From 97f9fe4b93808d2a781718399ebe70feed40e426 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Mon, 15 Apr 2019 21:47:26 +0100
Subject: [PATCH 01/24] Started optimizing SVR

---
 analysis.ipynb             | 113 +++++++++--------
 models.ipynb               | 250 +++++++++++++++++++++++++++++++++++++
 requirements.txt           |   9 ++
 utils/ContinuityImputer.py |  10 +-
 utils/DataFrameSelector.py |  11 ++
 utils/NoiseRemover.py      |  21 ++++
 6 files changed, 357 insertions(+), 57 deletions(-)
 create mode 100644 models.ipynb
 create mode 100644 utils/DataFrameSelector.py
 create mode 100644 utils/NoiseRemover.py

diff --git a/analysis.ipynb b/analysis.ipynb
index a4d93af..e70303a 100644
--- a/analysis.ipynb
+++ b/analysis.ipynb
@@ -2,9 +2,18 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 36,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "import pandas as pd\n",
@@ -21,7 +30,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -247,7 +256,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -261,7 +270,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -314,7 +323,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -348,7 +357,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -368,7 +377,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -587,7 +596,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -598,7 +607,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -607,7 +616,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -826,7 +835,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -839,7 +848,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -1058,7 +1067,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 43,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1078,7 +1087,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [
     {
@@ -1304,7 +1313,7 @@
        "[5 rows x 24 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 44,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1324,7 +1333,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 45,
    "metadata": {},
    "outputs": [
     {
@@ -1357,7 +1366,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 45,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1368,7 +1377,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [
     {
@@ -1467,7 +1476,7 @@
        "[1 rows x 24 columns]"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 46,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1478,7 +1487,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 47,
    "metadata": {},
    "outputs": [
     {
@@ -1667,7 +1676,7 @@
        "[4 rows x 24 columns]"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 47,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1688,7 +1697,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [
     {
@@ -1720,7 +1729,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
@@ -1744,7 +1753,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
@@ -1775,7 +1784,7 @@
        "Name: total_cases, dtype: float64"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1797,7 +1806,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
@@ -1806,7 +1815,7 @@
        "(1451, 22)"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 51,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1817,7 +1826,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [
     {
@@ -1845,7 +1854,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [
     {
@@ -1879,7 +1888,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 53,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1895,7 +1904,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [
     {
@@ -1935,7 +1944,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -2252,7 +2261,7 @@
        "max          2.228153e+00       1.063787e+01  "
       ]
      },
-     "execution_count": 35,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2284,7 +2293,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2294,7 +2303,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -2303,7 +2312,7 @@
        "2"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2326,7 +2335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -2361,7 +2370,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -2435,7 +2444,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -2446,7 +2455,7 @@
        "            pooling_func='deprecated')"
       ]
      },
-     "execution_count": 58,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2460,7 +2469,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -2532,7 +2541,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -2541,7 +2550,7 @@
        "('1990-04-30', '2010-06-25')"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2552,7 +2561,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -2584,7 +2593,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -2647,7 +2656,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -2694,7 +2703,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2703,7 +2712,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -2751,7 +2760,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -2761,7 +2770,7 @@
        "  svd_solver='auto', tol=0.0, whiten=False)"
       ]
      },
-     "execution_count": 65,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2774,7 +2783,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
diff --git a/models.ipynb b/models.ipynb
new file mode 100644
index 0000000..ed8b50c
--- /dev/null
+++ b/models.ipynb
@@ -0,0 +1,250 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Loading the Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['city',\n",
+       " 'year',\n",
+       " 'weekofyear',\n",
+       " 'week_start_date',\n",
+       " 'ndvi_ne',\n",
+       " 'ndvi_nw',\n",
+       " 'ndvi_se',\n",
+       " 'ndvi_sw',\n",
+       " 'precipitation_amt_mm',\n",
+       " 'reanalysis_air_temp_k',\n",
+       " 'reanalysis_avg_temp_k',\n",
+       " 'reanalysis_dew_point_temp_k',\n",
+       " 'reanalysis_max_air_temp_k',\n",
+       " 'reanalysis_min_air_temp_k',\n",
+       " 'reanalysis_precip_amt_kg_per_m2',\n",
+       " 'reanalysis_relative_humidity_percent',\n",
+       " 'reanalysis_sat_precip_amt_mm',\n",
+       " 'reanalysis_specific_humidity_g_per_kg',\n",
+       " 'reanalysis_tdtr_k',\n",
+       " 'station_avg_temp_c',\n",
+       " 'station_diur_temp_rng_c',\n",
+       " 'station_max_temp_c',\n",
+       " 'station_min_temp_c',\n",
+       " 'station_precip_mm']"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train_1 = pd.read_csv('data/dengue_features_train.csv')\n",
+    "y_train = pd.read_csv('data/dengue_labels_train.csv')['total_cases']\n",
+    "attr = list(X_train_1)\n",
+    "attr"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleaning the noisy training data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_train = y_train[X_train_1['weekofyear'] != 53]\n",
+    "X_train_1 = X_train_1[X_train_1['weekofyear'] != 53]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from utils.ContinuityImputer import ContinuityImputer\n",
+    "from utils.DataFrameSelector import DataFrameSelector\n",
+    "\n",
+    "pipeline = Pipeline([\n",
+    "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
+    "    ('dataframe_selector', DataFrameSelector(attribute_names=attr[4:])),\n",
+    "    ('scaler', StandardScaler()),\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1451, 20)"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train = pipeline.fit_transform(X_train_1)\n",
+    "X_train.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Model Selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import RandomizedSearchCV\n",
+    "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n",
+    "from sklearn.neighbors import KNeighborsRegressor\n",
+    "from sklearn.svm import SVR\n",
+    "from sklearn.tree import DecisionTreeRegressor\n",
+    "from scipy.stats import randint as sp_randint\n",
+    "score_metric='neg_mean_absolute_error'\n",
+    "jobs=-1 #-1 to make it execute in parallel\n",
+    "k_folds=10\n",
+    "n_iter_search = 20\n",
+    "verbose_level = 1\n",
+    "random_n = 42"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## SVR"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "C = sp_randint(0, 10000)\n",
+    "params = {'kernel':['rbf', 'sigmoid','linear'], 'gamma':['scale'], 'C': C}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 10 folds for each of 20 candidates, totalling 200 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n"
+     ]
+    }
+   ],
+   "source": [
+    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "SVR_optimizer.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C},"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "SVR_poly_optimizer.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pd.DataFrame(SVR_optimizer.cv_results_)[['mean_fit_time','param_C', 'param_kernel', 'mean_test_score', 'mean_train_score']]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/requirements.txt b/requirements.txt
index f91d9ea..ef8b002 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,7 @@ appnope==0.1.0
 attrs==19.1.0
 backcall==0.1.0
 bleach==3.1.0
+cycler==0.10.0
 decorator==4.4.0
 defusedxml==0.5.0
 entrypoints==0.3
@@ -15,11 +16,15 @@ jupyter-client==5.2.4
 jupyter-core==4.4.0
 jupyterlab==0.35.4
 jupyterlab-server==0.2.0
+kiwisolver==1.0.1
 MarkupSafe==1.1.1
+matplotlib==3.0.3
 mistune==0.8.4
 nbconvert==5.4.1
 nbformat==4.4.0
 notebook==5.7.8
+numpy==1.16.2
+pandas==0.24.2
 pandocfilters==1.4.2
 parso==0.4.0
 pexpect==4.7.0
@@ -28,9 +33,13 @@ prometheus-client==0.6.0
 prompt-toolkit==2.0.9
 ptyprocess==0.6.0
 Pygments==2.3.1
+pyparsing==2.4.0
 pyrsistent==0.14.11
 python-dateutil==2.8.0
+pytz==2018.9
 pyzmq==18.0.1
+scikit-learn==0.20.3
+scipy==1.2.1
 Send2Trash==1.5.0
 six==1.12.0
 terminado==0.8.2
diff --git a/utils/ContinuityImputer.py b/utils/ContinuityImputer.py
index 58e498e..0c4d058 100644
--- a/utils/ContinuityImputer.py
+++ b/utils/ContinuityImputer.py
@@ -10,8 +10,9 @@ def fit(self, X, y=None):
         X_iq = X[X['city'] == 'iq']
         X_sj = X[X['city'] == 'sj']
 
-        self.medians_iq = {attr: np.nanmedian(X_iq[attr]) for attr in self.attributes}
-        self.medians_sj = {attr: np.nanmedian(X_sj[attr]) for attr in self.attributes}
+        medians_iq = {attr: np.nanmedian(X_iq[attr]) for attr in self.attributes}
+        medians_sj = {attr: np.nanmedian(X_sj[attr]) for attr in self.attributes}
+        self.last_values = {'sj': medians_sj, 'iq': medians_iq}
 
         return self
     
@@ -20,14 +21,13 @@ def transform(self, X):
             X = X.copy()
 
         for attr in self.attributes:
-            last_values = {'sj': self.medians_sj[attr], 'iq': self.medians_iq[attr]}
             r = []
             for _, curr in X.iterrows():
                 city = curr['city']
                 val = curr[attr]
                 if val is not None and not np.isnan(val):
-                    last_values[city] = val
-                r.append(last_values[city])
+                    self.last_values[city][attr] = val
+                r.append(self.last_values[city][attr])
             X[attr] = r
 
         return X
\ No newline at end of file
diff --git a/utils/DataFrameSelector.py b/utils/DataFrameSelector.py
new file mode 100644
index 0000000..42e46ea
--- /dev/null
+++ b/utils/DataFrameSelector.py
@@ -0,0 +1,11 @@
+from sklearn.base import BaseEstimator, TransformerMixin
+
+class DataFrameSelector(BaseEstimator, TransformerMixin):
+    
+    def __init__(self, attribute_names):
+        self.attribute_names = attribute_names
+    
+    def fit(self, X, y=None):
+        return self
+    def transform(self, X):
+        return X[self.attribute_names]
\ No newline at end of file
diff --git a/utils/NoiseRemover.py b/utils/NoiseRemover.py
new file mode 100644
index 0000000..6e8b0b9
--- /dev/null
+++ b/utils/NoiseRemover.py
@@ -0,0 +1,21 @@
+from sklearn.base import BaseEstimator, TransformerMixin
+
+class NoiseRemover(BaseEstimator, TransformerMixin):
+    def __init__(self, noisy_weeks=53, copy=True):
+        self.noisy_weeks = noisy_weeks
+        self.train_set = True
+        self.copy = copy
+
+    def fit(self, X, y=None):
+        return self
+    
+    def transform(self, X):
+        if self.train_set:
+            if self.copy:
+                X = X.copy()
+            X = X[X['weekofyear'] != self.noisy_weeks]
+            self.train_set = False
+
+        return X
+
+

From 0f22f4238547f886ff5b0b00e42352cd214f40ea Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Tue, 16 Apr 2019 15:03:06 +0100
Subject: [PATCH 02/24] Last Weeks is working

---
 analysis.ipynb            | 153 ++++++++++++++++++++----------
 models.ipynb              | 191 +++++++++++++++++++++++++++++++-------
 utils/DataFrameDropper.py |  19 ++++
 utils/LastWeeks.py        |  41 ++++++++
 4 files changed, 321 insertions(+), 83 deletions(-)
 create mode 100644 utils/DataFrameDropper.py
 create mode 100644 utils/LastWeeks.py

diff --git a/analysis.ipynb b/analysis.ipynb
index e70303a..587a6db 100644
--- a/analysis.ipynb
+++ b/analysis.ipynb
@@ -2,18 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "import pandas as pd\n",
@@ -30,7 +21,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -256,7 +247,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -270,7 +261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -323,7 +314,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -357,7 +348,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -377,7 +368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -596,7 +587,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -607,7 +598,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -616,7 +607,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -835,7 +826,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 42,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -848,7 +839,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -1067,7 +1058,7 @@
        "[5 rows x 25 columns]"
       ]
      },
-     "execution_count": 43,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1087,7 +1078,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -1313,7 +1304,7 @@
        "[5 rows x 24 columns]"
       ]
      },
-     "execution_count": 44,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1333,7 +1324,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -1366,7 +1357,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 45,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1377,7 +1368,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -1476,7 +1467,7 @@
        "[1 rows x 24 columns]"
       ]
      },
-     "execution_count": 46,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1487,7 +1478,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -1676,7 +1667,7 @@
        "[4 rows x 24 columns]"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1729,7 +1720,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -1753,7 +1744,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -1784,7 +1775,7 @@
        "Name: total_cases, dtype: float64"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1806,7 +1797,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -1815,7 +1806,7 @@
        "(1451, 22)"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1826,7 +1817,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -1854,7 +1845,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -1888,7 +1879,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 53,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1904,7 +1895,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -1944,7 +1935,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -2261,7 +2252,7 @@
        "max          2.228153e+00       1.063787e+01  "
       ]
      },
-     "execution_count": 20,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2293,7 +2284,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2303,7 +2294,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -2312,7 +2303,7 @@
        "2"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2335,7 +2326,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 42,
    "metadata": {},
    "outputs": [
     {
@@ -2370,7 +2361,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
@@ -2541,22 +2532,82 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('2000-07-01', '2010-06-25')"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "min(train_data[train_data['city'] == 'iq']['week_start_date']), max(train_data[train_data['city'] == 'iq']['week_start_date'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('2010-07-02', '2013-06-25')"
+      ]
+     },
+     "execution_count": 50,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "min(test_data[test_data['city'] == 'iq']['week_start_date']), max(test_data[test_data['city'] == 'iq']['week_start_date'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "('1990-04-30', '2008-04-22')"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "min(train_data[train_data['city'] == 'sj']['week_start_date']), max(train_data[train_data['city'] == 'sj']['week_start_date'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "('1990-04-30', '2010-06-25')"
+       "('2008-04-29', '2013-04-23')"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 52,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "min(train_data['week_start_date']), max(train_data['week_start_date'])"
+    "min(test_data[test_data['city'] == 'sj']['week_start_date']), max(test_data[test_data['city'] == 'sj']['week_start_date'])"
    ]
   },
   {
diff --git a/models.ipynb b/models.ipynb
index ed8b50c..4363ee6 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -52,7 +52,7 @@
        " 'station_precip_mm']"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -73,12 +73,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1451, 24)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "y_train = y_train[X_train_1['weekofyear'] != 53]\n",
-    "X_train_1 = X_train_1[X_train_1['weekofyear'] != 53]"
+    "def bools_to_indexes(booleans):\n",
+    "    r = []\n",
+    "    for idx, x in enumerate(booleans):\n",
+    "        if x:\n",
+    "            r.append(idx)\n",
+    "    return r\n",
+    "\n",
+    "y_train = y_train.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n",
+    "X_train_1 = X_train_1.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n",
+    "X_train_1.shape"
    ]
   },
   {
@@ -90,35 +109,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
     "%autoreload\n",
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.decomposition import PCA\n",
     "from utils.ContinuityImputer import ContinuityImputer\n",
-    "from utils.DataFrameSelector import DataFrameSelector\n",
+    "from utils.DataFrameDropper import DataFrameDropper\n",
+    "from utils.LastWeeks import LastWeeks\n",
+    "lw = LastWeeks(attributes=['ndvi_ne', 'precipitation_amt_mm', 'reanalysis_relative_humidity_percent'], weeks=3)\n",
     "\n",
     "pipeline = Pipeline([\n",
     "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
-    "    ('dataframe_selector', DataFrameSelector(attribute_names=attr[4:])),\n",
-    "    ('scaler', StandardScaler()),\n",
+    "    ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n",
+    "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
+    "    #('scaler', StandardScaler()),\n",
+    "    #('pca', PCA(n_components=9))\n",
     "])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1451, 20)"
+       "(1456, 60)"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -137,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -147,10 +171,9 @@
     "from sklearn.svm import SVR\n",
     "from sklearn.tree import DecisionTreeRegressor\n",
     "from scipy.stats import randint as sp_randint\n",
+    "from scipy.stats import uniform as sp_uniform\n",
     "score_metric='neg_mean_absolute_error'\n",
     "jobs=-1 #-1 to make it execute in parallel\n",
-    "k_folds=10\n",
-    "n_iter_search = 20\n",
     "verbose_level = 1\n",
     "random_n = 42"
    ]
@@ -159,61 +182,167 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## SVR"
+    "## SVR\n",
+    "* The results with the kernel *sigmoid* were too bad, so we removed them."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
+    "k_folds=10\n",
+    "n_iter_search = 100\n",
     "C = sp_randint(0, 10000)\n",
-    "params = {'kernel':['rbf', 'sigmoid','linear'], 'gamma':['scale'], 'C': C}"
+    "params = {'kernel':['rbf', 'linear'], 'gamma':['scale'], 'C': C}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "SVR_optimizer.fit(X_train, y_train)\n",
+    "SVR_optimizer.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "SVR_poly_optimizer.fit(X_train, y_train)\n",
+    "SVR_poly_optimizer.best_score_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Regression Trees"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k_folds=10\n",
+    "n_iter_search = 100\n",
+    "min_samples = sp_uniform(0.03, 0.35)\n",
+    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 5), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 10 folds for each of 20 candidates, totalling 200 fits\n"
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n"
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 319 tasks      | elapsed:    8.5s\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   22.8s finished\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "-18.391109579600275"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "SVR_optimizer.fit(X_train, y_train)"
+    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Tree_optimizer.fit(X_train, y_train)\n",
+    "Tree_optimizer.best_score_"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Random Forests"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 66,
    "metadata": {},
    "outputs": [],
    "source": [
-    "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C},"
+    "k_folds=5\n",
+    "n_iter_search = 100\n",
+    "min_samples = sp_uniform(0.01, 0.35)\n",
+    "params = {'n_estimators': sp_randint(5,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 8), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 67,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    6.2s\n",
+      "[Parallel(n_jobs=-1)]: Done 261 tasks      | elapsed:   27.3s\n",
+      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:   53.8s finished\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "-18.976912474155757"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "SVR_poly_optimizer.fit(X_train, y_train)"
+    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Forest_optimizer.fit(X_train, y_train)\n",
+    "Forest_optimizer.best_score_"
    ]
   },
   {
@@ -221,9 +350,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "pd.DataFrame(SVR_optimizer.cv_results_)[['mean_fit_time','param_C', 'param_kernel', 'mean_test_score', 'mean_train_score']]"
-   ]
+   "source": []
   }
  ],
  "metadata": {
diff --git a/utils/DataFrameDropper.py b/utils/DataFrameDropper.py
new file mode 100644
index 0000000..4c8c265
--- /dev/null
+++ b/utils/DataFrameDropper.py
@@ -0,0 +1,19 @@
+from sklearn.base import BaseEstimator, TransformerMixin
+import pandas
+
+class DataFrameDropper(BaseEstimator, TransformerMixin):
+    
+    def __init__(self, attribute_names, copy = True):
+        self.attribute_names = attribute_names
+        self.copy = copy
+    
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X):
+        if self.copy:
+            X = X.copy()
+        if isinstance(X, pandas.core.frame.DataFrame):
+            return X.drop(self.attribute_names, axis = 1)
+
+        raise ValueError('You try to drop some columns from something which is not a DataFrame')
\ No newline at end of file
diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py
new file mode 100644
index 0000000..8ed0aaf
--- /dev/null
+++ b/utils/LastWeeks.py
@@ -0,0 +1,41 @@
+from sklearn.base import BaseEstimator, TransformerMixin
+from collections import deque
+import numpy as np
+import pandas as pd
+
+
+class LastWeeks(BaseEstimator, TransformerMixin):
+    def __init__(self, attributes, weeks=2, new_attributes_prefix='last_weeks_', copy=True):
+        self.attributes = attributes
+        self.weeks = weeks
+        self.new_attributes_prefix = new_attributes_prefix
+        self.copy = copy
+
+    def fit(self, X, y=None):
+        attr_medians = [np.nanmedian(X[attr]) for attr in self.attributes]
+        dq = deque([attr_medians for _ in range(self.weeks)])
+        self.last = {'sj': dq, 'iq': dq}
+
+        return self
+
+    def transform(self, X):
+        if self.copy:
+            X = X.copy()
+
+        r = np.ndarray(shape=[X.shape[0], self.weeks, len(self.attributes)])
+
+        for idx, (_, week) in enumerate(X.iterrows()):
+            city = week['city']
+            r[idx] = self.last[city]
+            self.last[city].pop()
+            self.last[city].appendleft(week[self.attributes])
+
+        r = pd.DataFrame(r.reshape([X.shape[0], self.weeks * len(self.attributes)]),
+                     columns=[self.new_attributes_prefix + str(week) + '_' + str(attr)
+                              for week in range(self.weeks)
+                              for attr in self.attributes
+                              ])
+        
+        X = pd.concat([X, r], axis=1)
+
+        return X

From fb0c69f4dc41977e53716c8c0efe22eb2a53a347 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Tue, 16 Apr 2019 16:52:32 +0100
Subject: [PATCH 03/24] Actually working now

---
 models.ipynb       | 562 +++++++++++++++++++++++++++++++++++++++++++--
 utils/LastWeeks.py |   2 +-
 2 files changed, 548 insertions(+), 16 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 4363ee6..71a7ba4 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -52,7 +52,7 @@
        " 'station_precip_mm']"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -82,7 +82,7 @@
        "(1451, 24)"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -95,8 +95,11 @@
     "            r.append(idx)\n",
     "    return r\n",
     "\n",
-    "y_train = y_train.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n",
-    "X_train_1 = X_train_1.drop(bools_to_indexes(X_train_1['weekofyear'] == 53))\n",
+    "idx = bools_to_indexes(X_train_1['weekofyear'] == 53)\n",
+    "y_train.drop(idx, inplace=True)\n",
+    "y_train.reset_index(drop=True, inplace=True)\n",
+    "X_train_1.drop(idx, inplace=True)\n",
+    "X_train_1.reset_index(drop=True, inplace=True)\n",
     "X_train_1.shape"
    ]
   },
@@ -109,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -125,24 +128,32 @@
     "pipeline = Pipeline([\n",
     "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
     "    ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n",
-    "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
+    "    #('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
     "    #('scaler', StandardScaler()),\n",
-    "    #('pca', PCA(n_components=9))\n",
+    "    #('pca', PCA(n_components=0.9))\n",
     "])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "X:  (1451, 24)\n",
+      "R:  (1451, 40)\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "(1456, 60)"
+       "(1456, 64)"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -161,7 +172,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -192,8 +203,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=10\n",
-    "n_iter_search = 100\n",
+    "k_folds=4\n",
+    "n_iter_search = 20\n",
     "C = sp_randint(0, 10000)\n",
     "params = {'kernel':['rbf', 'linear'], 'gamma':['scale'], 'C': C}"
    ]
@@ -345,6 +356,527 @@
     "Forest_optimizer.best_score_"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
+       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
+       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.sum(np.isnan(X_train), axis=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "300"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "5*60"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "nan"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "np.nan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>city</th>\n",
+       "      <th>year</th>\n",
+       "      <th>weekofyear</th>\n",
+       "      <th>week_start_date</th>\n",
+       "      <th>ndvi_ne</th>\n",
+       "      <th>ndvi_nw</th>\n",
+       "      <th>ndvi_se</th>\n",
+       "      <th>ndvi_sw</th>\n",
+       "      <th>precipitation_amt_mm</th>\n",
+       "      <th>reanalysis_air_temp_k</th>\n",
+       "      <th>...</th>\n",
+       "      <th>last_weeks_1_reanalysis_precip_amt_kg_per_m2</th>\n",
+       "      <th>last_weeks_1_reanalysis_relative_humidity_percent</th>\n",
+       "      <th>last_weeks_1_reanalysis_sat_precip_amt_mm</th>\n",
+       "      <th>last_weeks_1_reanalysis_specific_humidity_g_per_kg</th>\n",
+       "      <th>last_weeks_1_reanalysis_tdtr_k</th>\n",
+       "      <th>last_weeks_1_station_avg_temp_c</th>\n",
+       "      <th>last_weeks_1_station_diur_temp_rng_c</th>\n",
+       "      <th>last_weeks_1_station_max_temp_c</th>\n",
+       "      <th>last_weeks_1_station_min_temp_c</th>\n",
+       "      <th>last_weeks_1_station_precip_mm</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>139</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>24.94</td>\n",
+       "      <td>76.661429</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>15.251429</td>\n",
+       "      <td>2.642857</td>\n",
+       "      <td>26.685714</td>\n",
+       "      <td>8.385714</td>\n",
+       "      <td>32.2</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>47.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>451</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>12.80</td>\n",
+       "      <td>78.418571</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>16.564286</td>\n",
+       "      <td>2.128571</td>\n",
+       "      <td>25.928571</td>\n",
+       "      <td>6.042857</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>29.7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>763</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>21.68</td>\n",
+       "      <td>74.778571</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>14.261429</td>\n",
+       "      <td>1.957143</td>\n",
+       "      <td>24.985714</td>\n",
+       "      <td>4.900000</td>\n",
+       "      <td>28.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>23.8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1170</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>96.21</td>\n",
+       "      <td>95.530000</td>\n",
+       "      <td>93.73</td>\n",
+       "      <td>18.408571</td>\n",
+       "      <td>7.228571</td>\n",
+       "      <td>28.400000</td>\n",
+       "      <td>10.400000</td>\n",
+       "      <td>34.2</td>\n",
+       "      <td>22.5</td>\n",
+       "      <td>232.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1430</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>...</td>\n",
+       "      <td>31.20</td>\n",
+       "      <td>87.641429</td>\n",
+       "      <td>19.04</td>\n",
+       "      <td>18.001429</td>\n",
+       "      <td>8.628571</td>\n",
+       "      <td>28.966667</td>\n",
+       "      <td>11.266667</td>\n",
+       "      <td>35.2</td>\n",
+       "      <td>22.5</td>\n",
+       "      <td>0.8</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 64 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     city  year  weekofyear week_start_date  ndvi_ne  ndvi_nw  ndvi_se  \\\n",
+       "139   NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
+       "451   NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
+       "763   NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
+       "1170  NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
+       "1430  NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
+       "\n",
+       "      ndvi_sw  precipitation_amt_mm  reanalysis_air_temp_k  ...  \\\n",
+       "139       NaN                   NaN                    NaN  ...   \n",
+       "451       NaN                   NaN                    NaN  ...   \n",
+       "763       NaN                   NaN                    NaN  ...   \n",
+       "1170      NaN                   NaN                    NaN  ...   \n",
+       "1430      NaN                   NaN                    NaN  ...   \n",
+       "\n",
+       "      last_weeks_1_reanalysis_precip_amt_kg_per_m2  \\\n",
+       "139                                          24.94   \n",
+       "451                                          12.80   \n",
+       "763                                          21.68   \n",
+       "1170                                         96.21   \n",
+       "1430                                         31.20   \n",
+       "\n",
+       "      last_weeks_1_reanalysis_relative_humidity_percent  \\\n",
+       "139                                           76.661429   \n",
+       "451                                           78.418571   \n",
+       "763                                           74.778571   \n",
+       "1170                                          95.530000   \n",
+       "1430                                          87.641429   \n",
+       "\n",
+       "      last_weeks_1_reanalysis_sat_precip_amt_mm  \\\n",
+       "139                                        0.00   \n",
+       "451                                        0.00   \n",
+       "763                                        0.00   \n",
+       "1170                                      93.73   \n",
+       "1430                                      19.04   \n",
+       "\n",
+       "      last_weeks_1_reanalysis_specific_humidity_g_per_kg  \\\n",
+       "139                                           15.251429    \n",
+       "451                                           16.564286    \n",
+       "763                                           14.261429    \n",
+       "1170                                          18.408571    \n",
+       "1430                                          18.001429    \n",
+       "\n",
+       "      last_weeks_1_reanalysis_tdtr_k  last_weeks_1_station_avg_temp_c  \\\n",
+       "139                         2.642857                        26.685714   \n",
+       "451                         2.128571                        25.928571   \n",
+       "763                         1.957143                        24.985714   \n",
+       "1170                        7.228571                        28.400000   \n",
+       "1430                        8.628571                        28.966667   \n",
+       "\n",
+       "      last_weeks_1_station_diur_temp_rng_c  last_weeks_1_station_max_temp_c  \\\n",
+       "139                               8.385714                             32.2   \n",
+       "451                               6.042857                             30.0   \n",
+       "763                               4.900000                             28.3   \n",
+       "1170                             10.400000                             34.2   \n",
+       "1430                             11.266667                             35.2   \n",
+       "\n",
+       "      last_weeks_1_station_min_temp_c  last_weeks_1_station_precip_mm  \n",
+       "139                              21.7                            47.5  \n",
+       "451                              22.2                            29.7  \n",
+       "763                              21.1                            23.8  \n",
+       "1170                             22.5                           232.1  \n",
+       "1430                             22.5                             0.8  \n",
+       "\n",
+       "[5 rows x 64 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train.loc[idx,:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   a  b\n",
+       "0  1  2\n",
+       "1  3  4\n",
+       "2  5  6"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.DataFrame([[1,2],[3,4],[5,6]], columns=['a','b'])\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   a  b\n",
+       "0  1  2\n",
+       "2  5  6"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.drop([1],inplace=True)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>a</th>\n",
+       "      <th>b</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   a  b\n",
+       "0  1  2\n",
+       "1  5  6"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.reset_index(drop=True, inplace=True)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0\n",
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "for idx, (_, x) in enumerate(df.iterrows()):\n",
+    "    print(idx)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py
index 8ed0aaf..1129f6f 100644
--- a/utils/LastWeeks.py
+++ b/utils/LastWeeks.py
@@ -24,7 +24,7 @@ def transform(self, X):
 
         r = np.ndarray(shape=[X.shape[0], self.weeks, len(self.attributes)])
 
-        for idx, (_, week) in enumerate(X.iterrows()):
+        for idx, week in X.iterrows():
             city = week['city']
             r[idx] = self.last[city]
             self.last[city].pop()

From baca2ca8091ab211a43da99bb8491e8e6c95bf3e Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Tue, 16 Apr 2019 20:35:31 +0100
Subject: [PATCH 04/24] calculated optimal parameters

---
 models.ipynb | 687 ++++++++++-----------------------------------------
 1 file changed, 124 insertions(+), 563 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 71a7ba4..13271f3 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -52,7 +52,7 @@
        " 'station_precip_mm']"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -82,7 +82,7 @@
        "(1451, 24)"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -128,32 +128,24 @@
     "pipeline = Pipeline([\n",
     "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
     "    ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n",
-    "    #('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
-    "    #('scaler', StandardScaler()),\n",
-    "    #('pca', PCA(n_components=0.9))\n",
+    "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
+    "    ('scaler', StandardScaler()),\n",
+    "    ('pca', PCA(n_components=0.95))\n",
     "])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "X:  (1451, 24)\n",
-      "R:  (1451, 40)\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "(1456, 64)"
+       "(1451, 23)"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -199,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -211,64 +203,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "SVR_optimizer.fit(X_train, y_train)\n",
-    "SVR_optimizer.best_score_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "SVR_poly_optimizer.fit(X_train, y_train)\n",
-    "SVR_poly_optimizer.best_score_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Regression Trees"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "k_folds=10\n",
-    "n_iter_search = 100\n",
-    "min_samples = sp_uniform(0.03, 0.35)\n",
-    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 5), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
+      "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n"
      ]
     },
     {
@@ -276,57 +218,67 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    2.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 319 tasks      | elapsed:    8.5s\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   22.8s finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 24.6min\n",
+      "[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed: 52.6min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-18.391109579600275"
+       "-19.17685248872835"
       ]
      },
-     "execution_count": 62,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "Tree_optimizer.fit(X_train, y_train)\n",
-    "Tree_optimizer.best_score_"
+    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "SVR_optimizer.fit(X_train, y_train)\n",
+    "SVR_optimizer.best_score_"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVR(C=769, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "## Random Forests"
+    "SVR_optimizer.best_estimator_"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 66,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=5\n",
-    "n_iter_search = 100\n",
-    "min_samples = sp_uniform(0.01, 0.35)\n",
-    "params = {'n_estimators': sp_randint(5,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 8), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
+    "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n"
+      "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n"
      ]
     },
     {
@@ -334,88 +286,53 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    6.2s\n",
-      "[Parallel(n_jobs=-1)]: Done 261 tasks      | elapsed:   27.3s\n",
-      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:   53.8s finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  2.0min\n",
+      "[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:  2.8min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-18.976912474155757"
+       "-25.45358085803704"
       ]
      },
-     "execution_count": 67,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "Forest_optimizer.fit(X_train, y_train)\n",
-    "Forest_optimizer.best_score_"
+    "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "SVR_poly_optimizer.fit(X_train, y_train)\n",
+    "SVR_poly_optimizer.best_score_"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
-       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,\n",
-       "       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "np.sum(np.isnan(X_train), axis=0)"
+    "SVR_poly_optimizer.best_estimator_"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 35,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "300"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "5*60"
+    "## Regression Trees"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "nan"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "np.nan"
+    "k_folds=10\n",
+    "n_iter_search = 1000\n",
+    "min_samples = sp_uniform(0.01, 0.35)\n",
+    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
@@ -423,241 +340,35 @@
    "execution_count": 12,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 10 folds for each of 1000 candidates, totalling 10000 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    4.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:   17.6s\n",
+      "[Parallel(n_jobs=-1)]: Done 876 tasks      | elapsed:   43.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 2476 tasks      | elapsed:  2.0min\n",
+      "[Parallel(n_jobs=-1)]: Done 3576 tasks      | elapsed:  2.9min\n",
+      "[Parallel(n_jobs=-1)]: Done 4876 tasks      | elapsed:  3.9min\n",
+      "[Parallel(n_jobs=-1)]: Done 6376 tasks      | elapsed:  5.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 8076 tasks      | elapsed:  6.6min\n",
+      "[Parallel(n_jobs=-1)]: Done 9976 tasks      | elapsed:  8.2min\n",
+      "[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  8.2min finished\n"
+     ]
+    },
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>city</th>\n",
-       "      <th>year</th>\n",
-       "      <th>weekofyear</th>\n",
-       "      <th>week_start_date</th>\n",
-       "      <th>ndvi_ne</th>\n",
-       "      <th>ndvi_nw</th>\n",
-       "      <th>ndvi_se</th>\n",
-       "      <th>ndvi_sw</th>\n",
-       "      <th>precipitation_amt_mm</th>\n",
-       "      <th>reanalysis_air_temp_k</th>\n",
-       "      <th>...</th>\n",
-       "      <th>last_weeks_1_reanalysis_precip_amt_kg_per_m2</th>\n",
-       "      <th>last_weeks_1_reanalysis_relative_humidity_percent</th>\n",
-       "      <th>last_weeks_1_reanalysis_sat_precip_amt_mm</th>\n",
-       "      <th>last_weeks_1_reanalysis_specific_humidity_g_per_kg</th>\n",
-       "      <th>last_weeks_1_reanalysis_tdtr_k</th>\n",
-       "      <th>last_weeks_1_station_avg_temp_c</th>\n",
-       "      <th>last_weeks_1_station_diur_temp_rng_c</th>\n",
-       "      <th>last_weeks_1_station_max_temp_c</th>\n",
-       "      <th>last_weeks_1_station_min_temp_c</th>\n",
-       "      <th>last_weeks_1_station_precip_mm</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>139</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>...</td>\n",
-       "      <td>24.94</td>\n",
-       "      <td>76.661429</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>15.251429</td>\n",
-       "      <td>2.642857</td>\n",
-       "      <td>26.685714</td>\n",
-       "      <td>8.385714</td>\n",
-       "      <td>32.2</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>47.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>451</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>...</td>\n",
-       "      <td>12.80</td>\n",
-       "      <td>78.418571</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>16.564286</td>\n",
-       "      <td>2.128571</td>\n",
-       "      <td>25.928571</td>\n",
-       "      <td>6.042857</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>29.7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>763</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>...</td>\n",
-       "      <td>21.68</td>\n",
-       "      <td>74.778571</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>14.261429</td>\n",
-       "      <td>1.957143</td>\n",
-       "      <td>24.985714</td>\n",
-       "      <td>4.900000</td>\n",
-       "      <td>28.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>23.8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1170</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>...</td>\n",
-       "      <td>96.21</td>\n",
-       "      <td>95.530000</td>\n",
-       "      <td>93.73</td>\n",
-       "      <td>18.408571</td>\n",
-       "      <td>7.228571</td>\n",
-       "      <td>28.400000</td>\n",
-       "      <td>10.400000</td>\n",
-       "      <td>34.2</td>\n",
-       "      <td>22.5</td>\n",
-       "      <td>232.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1430</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>...</td>\n",
-       "      <td>31.20</td>\n",
-       "      <td>87.641429</td>\n",
-       "      <td>19.04</td>\n",
-       "      <td>18.001429</td>\n",
-       "      <td>8.628571</td>\n",
-       "      <td>28.966667</td>\n",
-       "      <td>11.266667</td>\n",
-       "      <td>35.2</td>\n",
-       "      <td>22.5</td>\n",
-       "      <td>0.8</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 64 columns</p>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "     city  year  weekofyear week_start_date  ndvi_ne  ndvi_nw  ndvi_se  \\\n",
-       "139   NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
-       "451   NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
-       "763   NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
-       "1170  NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
-       "1430  NaN   NaN         NaN             NaN      NaN      NaN      NaN   \n",
-       "\n",
-       "      ndvi_sw  precipitation_amt_mm  reanalysis_air_temp_k  ...  \\\n",
-       "139       NaN                   NaN                    NaN  ...   \n",
-       "451       NaN                   NaN                    NaN  ...   \n",
-       "763       NaN                   NaN                    NaN  ...   \n",
-       "1170      NaN                   NaN                    NaN  ...   \n",
-       "1430      NaN                   NaN                    NaN  ...   \n",
-       "\n",
-       "      last_weeks_1_reanalysis_precip_amt_kg_per_m2  \\\n",
-       "139                                          24.94   \n",
-       "451                                          12.80   \n",
-       "763                                          21.68   \n",
-       "1170                                         96.21   \n",
-       "1430                                         31.20   \n",
-       "\n",
-       "      last_weeks_1_reanalysis_relative_humidity_percent  \\\n",
-       "139                                           76.661429   \n",
-       "451                                           78.418571   \n",
-       "763                                           74.778571   \n",
-       "1170                                          95.530000   \n",
-       "1430                                          87.641429   \n",
-       "\n",
-       "      last_weeks_1_reanalysis_sat_precip_amt_mm  \\\n",
-       "139                                        0.00   \n",
-       "451                                        0.00   \n",
-       "763                                        0.00   \n",
-       "1170                                      93.73   \n",
-       "1430                                      19.04   \n",
-       "\n",
-       "      last_weeks_1_reanalysis_specific_humidity_g_per_kg  \\\n",
-       "139                                           15.251429    \n",
-       "451                                           16.564286    \n",
-       "763                                           14.261429    \n",
-       "1170                                          18.408571    \n",
-       "1430                                          18.001429    \n",
-       "\n",
-       "      last_weeks_1_reanalysis_tdtr_k  last_weeks_1_station_avg_temp_c  \\\n",
-       "139                         2.642857                        26.685714   \n",
-       "451                         2.128571                        25.928571   \n",
-       "763                         1.957143                        24.985714   \n",
-       "1170                        7.228571                        28.400000   \n",
-       "1430                        8.628571                        28.966667   \n",
-       "\n",
-       "      last_weeks_1_station_diur_temp_rng_c  last_weeks_1_station_max_temp_c  \\\n",
-       "139                               8.385714                             32.2   \n",
-       "451                               6.042857                             30.0   \n",
-       "763                               4.900000                             28.3   \n",
-       "1170                             10.400000                             34.2   \n",
-       "1430                             11.266667                             35.2   \n",
-       "\n",
-       "      last_weeks_1_station_min_temp_c  last_weeks_1_station_precip_mm  \n",
-       "139                              21.7                            47.5  \n",
-       "451                              22.2                            29.7  \n",
-       "763                              21.1                            23.8  \n",
-       "1170                             22.5                           232.1  \n",
-       "1430                             22.5                             0.8  \n",
-       "\n",
-       "[5 rows x 64 columns]"
+       "-18.271881461061337"
       ]
      },
      "execution_count": 12,
@@ -666,223 +377,73 @@
     }
    ],
    "source": [
-    "X_train.loc[idx,:]"
+    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Tree_optimizer.fit(X_train, y_train)\n",
+    "Tree_optimizer.best_score_"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 28,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   a  b\n",
-       "0  1  2\n",
-       "1  3  4\n",
-       "2  5  6"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "df = pd.DataFrame([[1,2],[3,4],[5,6]], columns=['a','b'])\n",
-    "df"
+    "## Random Forests"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   a  b\n",
-       "0  1  2\n",
-       "2  5  6"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "df.drop([1],inplace=True)\n",
-    "df"
+    "k_folds=5\n",
+    "n_iter_search = 30\n",
+    "min_samples = sp_uniform(0.01, 0.35)\n",
+    "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 5 folds for each of 30 candidates, totalling 150 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   24.6s finished\n"
+     ]
+    },
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>a</th>\n",
-       "      <th>b</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>5</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "   a  b\n",
-       "0  1  2\n",
-       "1  5  6"
+       "-18.8661842407535"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df.reset_index(drop=True, inplace=True)\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0\n",
-      "1\n"
-     ]
-    }
-   ],
-   "source": [
-    "for idx, (_, x) in enumerate(df.iterrows()):\n",
-    "    print(idx)"
+    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Forest_optimizer.fit(X_train, y_train)\n",
+    "Forest_optimizer.best_score_"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From 610dac0af98f10468b78a32abfebbe7cf023b292 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Wed, 17 Apr 2019 10:46:25 +0100
Subject: [PATCH 05/24] found that the optimal number of previous weeks was 3

---
 models.ipynb | 191 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 125 insertions(+), 66 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 13271f3..74a7ebf 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -127,25 +127,25 @@
     "\n",
     "pipeline = Pipeline([\n",
     "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
-    "    ('lw', LastWeeks(attributes=attr[4:], weeks=2)),\n",
+    "    ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n",
     "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
     "    ('scaler', StandardScaler()),\n",
-    "    ('pca', PCA(n_components=0.95))\n",
+    "    #('pca', PCA(n_components=0.65))\n",
     "])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1451, 23)"
+       "(1451, 80)"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 51,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -186,7 +186,7 @@
    "metadata": {},
    "source": [
     "## SVR\n",
-    "* The results with the kernel *sigmoid* were too bad, so we removed them."
+    "* The results with the kernel *sigmoid* and *poly* were too bad, so we removed them."
    ]
   },
   {
@@ -260,25 +260,42 @@
     "SVR_optimizer.best_estimator_"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Regression Trees\n",
+    "* 18.01 - with 2 previous weeks & without PCA & with (max_depth=6, min_samples_leaf=0.1611807565247405, min_samples_split=0.11193019906931466)\n",
+    "* 18.29 - With PCA at 0.9\n",
+    "* 18.27 - With PCA at 0.95\n",
+    "* 18.36 - With PCA at 0.65. PCA appears to be only making the model worse.\n",
+    "* 18.38 - Without PCA and with previous weeks. Clearly the previous weeks are useful\n",
+    "* **17.87** - Without PCA and with 3 previous weeks\n",
+    "* **17.86** - Without PCA and with 4 previous weeks"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
-    "params = {'kernel':['poly'], 'degree':sp_randint(2,8), 'gamma':['scale'], 'C': C}"
+    "k_folds=8\n",
+    "n_iter_search = 50\n",
+    "min_samples = sp_uniform(0.01, 0.35)\n",
+    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n"
+      "Fitting 8 folds for each of 50 candidates, totalling 400 fits\n"
      ]
     },
     {
@@ -286,65 +303,83 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:  2.0min\n",
-      "[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:  2.8min finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.0s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   26.5s\n",
+      "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:   55.3s finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-25.45358085803704"
+       "-17.86526533425224"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 57,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "SVR_poly_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "SVR_poly_optimizer.fit(X_train, y_train)\n",
-    "SVR_poly_optimizer.best_score_"
+    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Tree_optimizer.fit(X_train, y_train)\n",
+    "Tree_optimizer.best_score_"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 61,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n",
+       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+       "           min_impurity_split=None, min_samples_leaf=0.286561439185922,\n",
+       "           min_samples_split=0.22208599117335398,\n",
+       "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+       "           splitter='best')"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "SVR_poly_optimizer.best_estimator_"
+    "Tree_optimizer.best_estimator_"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Regression Trees"
+    "## Random Forests\n",
+    "* 18.34 With 4 previous weeks and without PCA"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=10\n",
-    "n_iter_search = 1000\n",
+    "k_folds=5\n",
+    "n_iter_search = 100\n",
     "min_samples = sp_uniform(0.01, 0.35)\n",
-    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
+    "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 59,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 10 folds for each of 1000 candidates, totalling 10000 fits\n"
+      "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n"
      ]
     },
     {
@@ -352,98 +387,122 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  76 tasks      | elapsed:    4.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 376 tasks      | elapsed:   17.6s\n",
-      "[Parallel(n_jobs=-1)]: Done 876 tasks      | elapsed:   43.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 1576 tasks      | elapsed:  1.3min\n",
-      "[Parallel(n_jobs=-1)]: Done 2476 tasks      | elapsed:  2.0min\n",
-      "[Parallel(n_jobs=-1)]: Done 3576 tasks      | elapsed:  2.9min\n",
-      "[Parallel(n_jobs=-1)]: Done 4876 tasks      | elapsed:  3.9min\n",
-      "[Parallel(n_jobs=-1)]: Done 6376 tasks      | elapsed:  5.2min\n",
-      "[Parallel(n_jobs=-1)]: Done 8076 tasks      | elapsed:  6.6min\n",
-      "[Parallel(n_jobs=-1)]: Done 9976 tasks      | elapsed:  8.2min\n",
-      "[Parallel(n_jobs=-1)]: Done 10000 out of 10000 | elapsed:  8.2min finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   23.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.7min\n",
+      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  4.0min\n",
+      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  4.4min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-18.271881461061337"
+       "-18.3364346427751"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 59,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "Tree_optimizer.fit(X_train, y_train)\n",
-    "Tree_optimizer.best_score_"
+    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Forest_optimizer.fit(X_train, y_train)\n",
+    "Forest_optimizer.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=8,\n",
+       "           max_features='auto', max_leaf_nodes=None,\n",
+       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "           min_samples_leaf=0.15819051824722938,\n",
+       "           min_samples_split=0.1482085313614494,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=None,\n",
+       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "Forest_optimizer.best_estimator_"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Random Forests"
+    "## KNN\n",
+    "* -21.349 - with PCA at 0.65 & 2 previous weeks\n",
+    "* -20.36  - without PCA"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=5\n",
-    "n_iter_search = 30\n",
-    "min_samples = sp_uniform(0.01, 0.35)\n",
-    "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
+    "k_folds=10\n",
+    "n_iter_search = 100\n",
+    "params = {'n_neighbors': sp_randint(3,150), 'weights': ['uniform', 'distance']}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 5 folds for each of 30 candidates, totalling 150 fits\n"
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   24.6s finished\n"
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.5s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   19.7s\n",
+      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   45.0s\n",
+      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.6min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-18.8661842407535"
+       "-20.359505759574677"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "Forest_optimizer.fit(X_train, y_train)\n",
-    "Forest_optimizer.best_score_"
+    "KNN_optimizer = RandomizedSearchCV(estimator=KNeighborsRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "KNN_optimizer.fit(X_train, y_train)\n",
+    "KNN_optimizer.best_score_"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From b338df149ccf47a2856cf63b2b6c3956d4832cb4 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Wed, 17 Apr 2019 12:04:22 +0100
Subject: [PATCH 06/24] created the LastInfected method

---
 models.ipynb          | 1673 ++++++++++++++++++++++++++++++++++++++++-
 utils/LastInfected.py |   34 +
 utils/LastWeeks.py    |    2 +-
 3 files changed, 1669 insertions(+), 40 deletions(-)
 create mode 100644 utils/LastInfected.py

diff --git a/models.ipynb b/models.ipynb
index 74a7ebf..0f3dc6c 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,13 +130,13 @@
     "    ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n",
     "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
     "    ('scaler', StandardScaler()),\n",
-    "    #('pca', PCA(n_components=0.65))\n",
+    "    #('pca', PCA(n_components=0.95))\n",
     "])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -145,7 +145,7 @@
        "(1451, 80)"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -270,32 +270,33 @@
     "* 18.27 - With PCA at 0.95\n",
     "* 18.36 - With PCA at 0.65. PCA appears to be only making the model worse.\n",
     "* 18.38 - Without PCA and with previous weeks. Clearly the previous weeks are useful\n",
-    "* **17.87** - Without PCA and with 3 previous weeks\n",
-    "* **17.86** - Without PCA and with 4 previous weeks"
+    "* 17.87 - Without PCA and with 3 previous weeks\n",
+    "* 17.86 - Without PCA and with 4 previous weeks\n",
+    "* 18.28 - Withou PCA 0.95 and 3 previous weeks fixed"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=8\n",
-    "n_iter_search = 50\n",
+    "k_folds=10\n",
+    "n_iter_search = 100\n",
     "min_samples = sp_uniform(0.01, 0.35)\n",
     "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 8 folds for each of 50 candidates, totalling 400 fits\n"
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
      ]
     },
     {
@@ -303,18 +304,20 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.0s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   26.5s\n",
-      "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:   55.3s finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    3.3s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   12.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   29.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:   56.1s\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.2min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-17.86526533425224"
+       "-18.274293590627153"
       ]
      },
-     "execution_count": 57,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -327,21 +330,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n",
+       "DecisionTreeRegressor(criterion='mae', max_depth=2, max_features=None,\n",
        "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-       "           min_impurity_split=None, min_samples_leaf=0.286561439185922,\n",
-       "           min_samples_split=0.22208599117335398,\n",
+       "           min_impurity_split=None, min_samples_leaf=0.2320229706454773,\n",
+       "           min_samples_split=0.24824690804416838,\n",
        "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
        "           splitter='best')"
       ]
      },
-     "execution_count": 61,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -355,31 +358,33 @@
    "metadata": {},
    "source": [
     "## Random Forests\n",
-    "* 18.34 With 4 previous weeks and without PCA"
+    "* 18.34 With 4 previous weeks and without PCA\n",
+    "* **17.79** With fixed 3 previous weeks and PCA at 0.95 (n_estimators= ?, max_depth = 2, min_samples_leaf=0.112, min_samples_split=0.224)\n",
+    "* **17.79** With fixed 3 previous weeks and without PCA (n_estimators= ?, max_depth = 5, min_samples_leaf=0.07, min_samples_split=0.27)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=5\n",
-    "n_iter_search = 100\n",
+    "k_folds=10\n",
+    "n_iter_search = 30\n",
     "min_samples = sp_uniform(0.01, 0.35)\n",
     "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 5 folds for each of 100 candidates, totalling 500 fits\n"
+      "Fitting 10 folds for each of 30 candidates, totalling 300 fits\n"
      ]
     },
     {
@@ -388,46 +393,45 @@
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
       "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   23.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  1.7min\n",
-      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  4.0min\n",
-      "[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:  4.4min finished\n"
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.1min\n",
+      "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  3.1min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-18.3364346427751"
+       "-17.740020145257915"
       ]
      },
-     "execution_count": 59,
+     "execution_count": 49,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
     "Forest_optimizer.fit(X_train, y_train)\n",
     "Forest_optimizer.best_score_"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=8,\n",
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
        "           max_features='auto', max_leaf_nodes=None,\n",
        "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=0.15819051824722938,\n",
-       "           min_samples_split=0.1482085313614494,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=None,\n",
+       "           min_samples_leaf=0.09435891310910409,\n",
+       "           min_samples_split=0.24914223158891036,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
        "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
       ]
      },
-     "execution_count": 60,
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -497,6 +501,1597 @@
     "KNN_optimizer.best_score_"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from utils.LastInfected import LastInfected\n",
+    "tmp = pd.concat([LastInfected(weeks=2).fit_transform(X_train_1, y=y_train), y_train], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>city</th>\n",
+       "      <th>year</th>\n",
+       "      <th>weekofyear</th>\n",
+       "      <th>week_start_date</th>\n",
+       "      <th>ndvi_ne</th>\n",
+       "      <th>ndvi_nw</th>\n",
+       "      <th>ndvi_se</th>\n",
+       "      <th>ndvi_sw</th>\n",
+       "      <th>precipitation_amt_mm</th>\n",
+       "      <th>reanalysis_air_temp_k</th>\n",
+       "      <th>...</th>\n",
+       "      <th>reanalysis_specific_humidity_g_per_kg</th>\n",
+       "      <th>reanalysis_tdtr_k</th>\n",
+       "      <th>station_avg_temp_c</th>\n",
+       "      <th>station_diur_temp_rng_c</th>\n",
+       "      <th>station_max_temp_c</th>\n",
+       "      <th>station_min_temp_c</th>\n",
+       "      <th>station_precip_mm</th>\n",
+       "      <th>last_infected_0</th>\n",
+       "      <th>last_infected_1</th>\n",
+       "      <th>total_cases</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>900</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>37</td>\n",
+       "      <td>2007-09-10</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.056900</td>\n",
+       "      <td>0.238543</td>\n",
+       "      <td>0.187486</td>\n",
+       "      <td>10.37</td>\n",
+       "      <td>301.117143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.720000</td>\n",
+       "      <td>3.157143</td>\n",
+       "      <td>28.871429</td>\n",
+       "      <td>6.514286</td>\n",
+       "      <td>33.9</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>10.4</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>112</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>901</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>38</td>\n",
+       "      <td>2007-09-17</td>\n",
+       "      <td>-0.013450</td>\n",
+       "      <td>0.074900</td>\n",
+       "      <td>0.152571</td>\n",
+       "      <td>0.131929</td>\n",
+       "      <td>70.39</td>\n",
+       "      <td>301.217143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>18.037143</td>\n",
+       "      <td>2.814286</td>\n",
+       "      <td>28.300000</td>\n",
+       "      <td>6.285714</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>26.9</td>\n",
+       "      <td>112.0</td>\n",
+       "      <td>71.0</td>\n",
+       "      <td>106</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>902</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2007-09-24</td>\n",
+       "      <td>-0.030700</td>\n",
+       "      <td>-0.002940</td>\n",
+       "      <td>0.152729</td>\n",
+       "      <td>0.144629</td>\n",
+       "      <td>94.37</td>\n",
+       "      <td>301.052857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.981429</td>\n",
+       "      <td>3.585714</td>\n",
+       "      <td>28.171429</td>\n",
+       "      <td>6.028571</td>\n",
+       "      <td>32.2</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>21.3</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>112.0</td>\n",
+       "      <td>101</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>903</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>40</td>\n",
+       "      <td>2007-10-01</td>\n",
+       "      <td>0.096000</td>\n",
+       "      <td>0.024767</td>\n",
+       "      <td>0.185300</td>\n",
+       "      <td>0.117729</td>\n",
+       "      <td>74.50</td>\n",
+       "      <td>301.022857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>18.118571</td>\n",
+       "      <td>2.685714</td>\n",
+       "      <td>27.985714</td>\n",
+       "      <td>7.242857</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>86.6</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>170</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>904</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>41</td>\n",
+       "      <td>2007-10-08</td>\n",
+       "      <td>0.009000</td>\n",
+       "      <td>0.104000</td>\n",
+       "      <td>0.118129</td>\n",
+       "      <td>0.126343</td>\n",
+       "      <td>108.26</td>\n",
+       "      <td>300.790000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>18.375714</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>28.128571</td>\n",
+       "      <td>6.914286</td>\n",
+       "      <td>33.3</td>\n",
+       "      <td>23.9</td>\n",
+       "      <td>14.5</td>\n",
+       "      <td>170.0</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>135</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>905</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>42</td>\n",
+       "      <td>2007-10-15</td>\n",
+       "      <td>0.021000</td>\n",
+       "      <td>0.132667</td>\n",
+       "      <td>0.245943</td>\n",
+       "      <td>0.189757</td>\n",
+       "      <td>17.56</td>\n",
+       "      <td>301.492857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.845714</td>\n",
+       "      <td>3.185714</td>\n",
+       "      <td>29.100000</td>\n",
+       "      <td>7.542857</td>\n",
+       "      <td>33.9</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>170.0</td>\n",
+       "      <td>106</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>906</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>43</td>\n",
+       "      <td>2007-10-22</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.009150</td>\n",
+       "      <td>0.191186</td>\n",
+       "      <td>0.176400</td>\n",
+       "      <td>16.48</td>\n",
+       "      <td>301.007143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.275714</td>\n",
+       "      <td>2.471429</td>\n",
+       "      <td>27.957143</td>\n",
+       "      <td>6.442857</td>\n",
+       "      <td>32.2</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>8.6</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>68</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>907</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>44</td>\n",
+       "      <td>2007-10-29</td>\n",
+       "      <td>0.124300</td>\n",
+       "      <td>0.054300</td>\n",
+       "      <td>0.156814</td>\n",
+       "      <td>0.123529</td>\n",
+       "      <td>137.55</td>\n",
+       "      <td>299.458571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.502857</td>\n",
+       "      <td>2.600000</td>\n",
+       "      <td>26.200000</td>\n",
+       "      <td>5.400000</td>\n",
+       "      <td>30.6</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>89.2</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>908</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>45</td>\n",
+       "      <td>2007-11-05</td>\n",
+       "      <td>-0.251700</td>\n",
+       "      <td>-0.048600</td>\n",
+       "      <td>0.205171</td>\n",
+       "      <td>0.172883</td>\n",
+       "      <td>15.25</td>\n",
+       "      <td>300.604286</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.295714</td>\n",
+       "      <td>2.257143</td>\n",
+       "      <td>27.442857</td>\n",
+       "      <td>6.857143</td>\n",
+       "      <td>32.2</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>4.1</td>\n",
+       "      <td>48.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>909</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>46</td>\n",
+       "      <td>2007-11-12</td>\n",
+       "      <td>-0.058900</td>\n",
+       "      <td>-0.062550</td>\n",
+       "      <td>0.205743</td>\n",
+       "      <td>0.202543</td>\n",
+       "      <td>42.00</td>\n",
+       "      <td>299.934286</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.082857</td>\n",
+       "      <td>3.542857</td>\n",
+       "      <td>26.814286</td>\n",
+       "      <td>6.685714</td>\n",
+       "      <td>31.1</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>65.7</td>\n",
+       "      <td>48.0</td>\n",
+       "      <td>48.0</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>910</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>47</td>\n",
+       "      <td>2007-11-19</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.204486</td>\n",
+       "      <td>0.156286</td>\n",
+       "      <td>73.37</td>\n",
+       "      <td>299.821429</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.187143</td>\n",
+       "      <td>2.514286</td>\n",
+       "      <td>26.900000</td>\n",
+       "      <td>6.200000</td>\n",
+       "      <td>31.1</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>40.4</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>48.0</td>\n",
+       "      <td>33</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>911</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>48</td>\n",
+       "      <td>2007-11-26</td>\n",
+       "      <td>-0.059500</td>\n",
+       "      <td>-0.041667</td>\n",
+       "      <td>0.090917</td>\n",
+       "      <td>0.129086</td>\n",
+       "      <td>15.95</td>\n",
+       "      <td>299.090000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.768571</td>\n",
+       "      <td>2.071429</td>\n",
+       "      <td>25.442857</td>\n",
+       "      <td>5.385714</td>\n",
+       "      <td>28.9</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>36.4</td>\n",
+       "      <td>33.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>912</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>49</td>\n",
+       "      <td>2007-12-03</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.042350</td>\n",
+       "      <td>0.095600</td>\n",
+       "      <td>0.089000</td>\n",
+       "      <td>17.85</td>\n",
+       "      <td>299.020000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.675714</td>\n",
+       "      <td>2.100000</td>\n",
+       "      <td>25.842857</td>\n",
+       "      <td>5.400000</td>\n",
+       "      <td>29.4</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>34.5</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>33.0</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>913</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>50</td>\n",
+       "      <td>2007-12-10</td>\n",
+       "      <td>-0.133050</td>\n",
+       "      <td>-0.045550</td>\n",
+       "      <td>0.151440</td>\n",
+       "      <td>0.143171</td>\n",
+       "      <td>31.30</td>\n",
+       "      <td>298.900000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.130000</td>\n",
+       "      <td>2.485714</td>\n",
+       "      <td>25.771429</td>\n",
+       "      <td>5.085714</td>\n",
+       "      <td>28.9</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>30.2</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>914</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>51</td>\n",
+       "      <td>2007-12-17</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.039000</td>\n",
+       "      <td>0.173417</td>\n",
+       "      <td>0.150171</td>\n",
+       "      <td>62.11</td>\n",
+       "      <td>298.668571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.344286</td>\n",
+       "      <td>2.371429</td>\n",
+       "      <td>25.071429</td>\n",
+       "      <td>4.914286</td>\n",
+       "      <td>28.9</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>108.2</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>915</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>52</td>\n",
+       "      <td>2007-12-24</td>\n",
+       "      <td>0.014800</td>\n",
+       "      <td>0.016300</td>\n",
+       "      <td>0.207267</td>\n",
+       "      <td>0.144578</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>298.602857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.318571</td>\n",
+       "      <td>2.985714</td>\n",
+       "      <td>25.085714</td>\n",
+       "      <td>6.242857</td>\n",
+       "      <td>28.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>16.8</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>916</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2008-01-01</td>\n",
+       "      <td>0.000600</td>\n",
+       "      <td>-0.309600</td>\n",
+       "      <td>0.239814</td>\n",
+       "      <td>0.195557</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>298.038571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.911429</td>\n",
+       "      <td>1.842857</td>\n",
+       "      <td>25.400000</td>\n",
+       "      <td>5.300000</td>\n",
+       "      <td>29.4</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>55.5</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>917</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2008-01-08</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.108250</td>\n",
+       "      <td>0.330486</td>\n",
+       "      <td>0.244286</td>\n",
+       "      <td>37.24</td>\n",
+       "      <td>298.142857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.980000</td>\n",
+       "      <td>2.057143</td>\n",
+       "      <td>24.971429</td>\n",
+       "      <td>5.014286</td>\n",
+       "      <td>28.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>64.8</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>918</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2008-01-15</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.038300</td>\n",
+       "      <td>0.125000</td>\n",
+       "      <td>0.108843</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.627143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.488571</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>24.428571</td>\n",
+       "      <td>5.628571</td>\n",
+       "      <td>27.8</td>\n",
+       "      <td>20.6</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>15</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>919</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2008-01-22</td>\n",
+       "      <td>-0.026800</td>\n",
+       "      <td>-0.215300</td>\n",
+       "      <td>0.112614</td>\n",
+       "      <td>0.160214</td>\n",
+       "      <td>81.22</td>\n",
+       "      <td>297.968571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.065714</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>24.528571</td>\n",
+       "      <td>4.585714</td>\n",
+       "      <td>27.8</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>83.1</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>920</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2008-01-29</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.135400</td>\n",
+       "      <td>0.223300</td>\n",
+       "      <td>0.170943</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>298.021429</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.408571</td>\n",
+       "      <td>3.300000</td>\n",
+       "      <td>24.571429</td>\n",
+       "      <td>6.442857</td>\n",
+       "      <td>28.9</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>3.1</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>921</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2008-02-05</td>\n",
+       "      <td>-0.111700</td>\n",
+       "      <td>-0.003200</td>\n",
+       "      <td>0.232843</td>\n",
+       "      <td>0.271171</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.237143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.225714</td>\n",
+       "      <td>2.071429</td>\n",
+       "      <td>24.214286</td>\n",
+       "      <td>5.157143</td>\n",
+       "      <td>27.2</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>35.9</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>10.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>922</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2008-02-12</td>\n",
+       "      <td>0.072000</td>\n",
+       "      <td>-0.063100</td>\n",
+       "      <td>0.150200</td>\n",
+       "      <td>0.149271</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.838571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.367143</td>\n",
+       "      <td>2.157143</td>\n",
+       "      <td>24.800000</td>\n",
+       "      <td>6.242857</td>\n",
+       "      <td>28.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>6.4</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>923</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2008-02-19</td>\n",
+       "      <td>-0.138650</td>\n",
+       "      <td>-0.095067</td>\n",
+       "      <td>0.246057</td>\n",
+       "      <td>0.228129</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.907143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.538571</td>\n",
+       "      <td>1.885714</td>\n",
+       "      <td>24.900000</td>\n",
+       "      <td>5.785714</td>\n",
+       "      <td>28.3</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>13.3</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>924</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2008-02-26</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.015420</td>\n",
+       "      <td>0.211629</td>\n",
+       "      <td>0.117343</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.765714</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.967143</td>\n",
+       "      <td>2.285714</td>\n",
+       "      <td>24.742857</td>\n",
+       "      <td>5.500000</td>\n",
+       "      <td>27.8</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>12.9</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>925</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>10</td>\n",
+       "      <td>2008-03-04</td>\n",
+       "      <td>-0.088900</td>\n",
+       "      <td>-0.090033</td>\n",
+       "      <td>0.223243</td>\n",
+       "      <td>0.154186</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.878571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.474286</td>\n",
+       "      <td>2.614286</td>\n",
+       "      <td>25.114286</td>\n",
+       "      <td>6.114286</td>\n",
+       "      <td>29.4</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>926</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2008-03-11</td>\n",
+       "      <td>-0.321400</td>\n",
+       "      <td>-0.141200</td>\n",
+       "      <td>0.110643</td>\n",
+       "      <td>0.141014</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.595714</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.721429</td>\n",
+       "      <td>2.085714</td>\n",
+       "      <td>25.328571</td>\n",
+       "      <td>5.814286</td>\n",
+       "      <td>28.9</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>4.4</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>927</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2008-03-18</td>\n",
+       "      <td>0.044900</td>\n",
+       "      <td>0.024450</td>\n",
+       "      <td>0.101629</td>\n",
+       "      <td>0.088000</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.404286</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.737143</td>\n",
+       "      <td>3.871429</td>\n",
+       "      <td>25.200000</td>\n",
+       "      <td>7.042857</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>20.6</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>928</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2008-03-25</td>\n",
+       "      <td>0.077850</td>\n",
+       "      <td>-0.039900</td>\n",
+       "      <td>0.310471</td>\n",
+       "      <td>0.296243</td>\n",
+       "      <td>27.19</td>\n",
+       "      <td>296.958571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.644286</td>\n",
+       "      <td>2.885714</td>\n",
+       "      <td>25.042857</td>\n",
+       "      <td>5.785714</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>1.8</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>929</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2008-04-01</td>\n",
+       "      <td>-0.038000</td>\n",
+       "      <td>-0.016833</td>\n",
+       "      <td>0.119371</td>\n",
+       "      <td>0.066386</td>\n",
+       "      <td>3.82</td>\n",
+       "      <td>298.081429</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.662857</td>\n",
+       "      <td>2.714286</td>\n",
+       "      <td>26.242857</td>\n",
+       "      <td>6.814286</td>\n",
+       "      <td>30.6</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>0.5</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>930</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2008-04-08</td>\n",
+       "      <td>-0.155200</td>\n",
+       "      <td>-0.052750</td>\n",
+       "      <td>0.137757</td>\n",
+       "      <td>0.141214</td>\n",
+       "      <td>16.96</td>\n",
+       "      <td>297.460000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.184286</td>\n",
+       "      <td>2.185714</td>\n",
+       "      <td>25.000000</td>\n",
+       "      <td>5.714286</td>\n",
+       "      <td>29.4</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>30.7</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>931</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>16</td>\n",
+       "      <td>2008-04-15</td>\n",
+       "      <td>0.001800</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.203900</td>\n",
+       "      <td>0.209843</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>297.630000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.858571</td>\n",
+       "      <td>2.785714</td>\n",
+       "      <td>25.314286</td>\n",
+       "      <td>6.242857</td>\n",
+       "      <td>29.4</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>11.2</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>932</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>17</td>\n",
+       "      <td>2008-04-22</td>\n",
+       "      <td>-0.037000</td>\n",
+       "      <td>-0.010367</td>\n",
+       "      <td>0.077314</td>\n",
+       "      <td>0.090586</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>298.672857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.671429</td>\n",
+       "      <td>3.957143</td>\n",
+       "      <td>27.042857</td>\n",
+       "      <td>7.514286</td>\n",
+       "      <td>31.7</td>\n",
+       "      <td>23.3</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>933</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2000-07-01</td>\n",
+       "      <td>0.192886</td>\n",
+       "      <td>0.132257</td>\n",
+       "      <td>0.340886</td>\n",
+       "      <td>0.247200</td>\n",
+       "      <td>25.41</td>\n",
+       "      <td>296.740000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.651429</td>\n",
+       "      <td>8.928571</td>\n",
+       "      <td>26.400000</td>\n",
+       "      <td>10.775000</td>\n",
+       "      <td>32.5</td>\n",
+       "      <td>20.7</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>934</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>27</td>\n",
+       "      <td>2000-07-08</td>\n",
+       "      <td>0.216833</td>\n",
+       "      <td>0.276100</td>\n",
+       "      <td>0.289457</td>\n",
+       "      <td>0.241657</td>\n",
+       "      <td>60.61</td>\n",
+       "      <td>296.634286</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.862857</td>\n",
+       "      <td>10.314286</td>\n",
+       "      <td>26.900000</td>\n",
+       "      <td>11.566667</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>20.8</td>\n",
+       "      <td>55.6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>935</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>28</td>\n",
+       "      <td>2000-07-15</td>\n",
+       "      <td>0.176757</td>\n",
+       "      <td>0.173129</td>\n",
+       "      <td>0.204114</td>\n",
+       "      <td>0.128014</td>\n",
+       "      <td>55.52</td>\n",
+       "      <td>296.415714</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.120000</td>\n",
+       "      <td>7.385714</td>\n",
+       "      <td>26.800000</td>\n",
+       "      <td>11.466667</td>\n",
+       "      <td>33.0</td>\n",
+       "      <td>20.7</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>936</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>29</td>\n",
+       "      <td>2000-07-22</td>\n",
+       "      <td>0.227729</td>\n",
+       "      <td>0.145429</td>\n",
+       "      <td>0.254200</td>\n",
+       "      <td>0.200314</td>\n",
+       "      <td>5.60</td>\n",
+       "      <td>295.357143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.431429</td>\n",
+       "      <td>9.114286</td>\n",
+       "      <td>25.766667</td>\n",
+       "      <td>10.533333</td>\n",
+       "      <td>31.5</td>\n",
+       "      <td>14.7</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>937</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>30</td>\n",
+       "      <td>2000-07-29</td>\n",
+       "      <td>0.328643</td>\n",
+       "      <td>0.322129</td>\n",
+       "      <td>0.254371</td>\n",
+       "      <td>0.361043</td>\n",
+       "      <td>62.76</td>\n",
+       "      <td>296.432857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.444286</td>\n",
+       "      <td>9.500000</td>\n",
+       "      <td>26.600000</td>\n",
+       "      <td>11.480000</td>\n",
+       "      <td>33.3</td>\n",
+       "      <td>19.1</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>938</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>31</td>\n",
+       "      <td>2000-08-05</td>\n",
+       "      <td>0.205529</td>\n",
+       "      <td>0.190757</td>\n",
+       "      <td>0.231671</td>\n",
+       "      <td>0.255314</td>\n",
+       "      <td>16.24</td>\n",
+       "      <td>297.191429</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.421429</td>\n",
+       "      <td>13.771429</td>\n",
+       "      <td>25.340000</td>\n",
+       "      <td>10.940000</td>\n",
+       "      <td>32.0</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>11.5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>939</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>32</td>\n",
+       "      <td>2000-08-12</td>\n",
+       "      <td>0.312486</td>\n",
+       "      <td>0.329986</td>\n",
+       "      <td>0.380586</td>\n",
+       "      <td>0.387271</td>\n",
+       "      <td>89.37</td>\n",
+       "      <td>297.320000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.311429</td>\n",
+       "      <td>11.471429</td>\n",
+       "      <td>27.016667</td>\n",
+       "      <td>11.650000</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>19.9</td>\n",
+       "      <td>72.9</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>940</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>33</td>\n",
+       "      <td>2000-08-19</td>\n",
+       "      <td>0.384133</td>\n",
+       "      <td>0.392240</td>\n",
+       "      <td>0.341780</td>\n",
+       "      <td>0.382750</td>\n",
+       "      <td>42.08</td>\n",
+       "      <td>297.627143</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.465714</td>\n",
+       "      <td>13.700000</td>\n",
+       "      <td>26.583333</td>\n",
+       "      <td>10.316667</td>\n",
+       "      <td>33.0</td>\n",
+       "      <td>20.5</td>\n",
+       "      <td>50.1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>941</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>34</td>\n",
+       "      <td>2000-08-26</td>\n",
+       "      <td>0.408157</td>\n",
+       "      <td>0.322157</td>\n",
+       "      <td>0.406714</td>\n",
+       "      <td>0.302714</td>\n",
+       "      <td>49.22</td>\n",
+       "      <td>298.238571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.444286</td>\n",
+       "      <td>13.771429</td>\n",
+       "      <td>26.900000</td>\n",
+       "      <td>13.400000</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>89.2</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>942</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>35</td>\n",
+       "      <td>2000-09-02</td>\n",
+       "      <td>0.332043</td>\n",
+       "      <td>0.321057</td>\n",
+       "      <td>0.314614</td>\n",
+       "      <td>0.324257</td>\n",
+       "      <td>53.65</td>\n",
+       "      <td>299.218571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.057143</td>\n",
+       "      <td>12.457143</td>\n",
+       "      <td>27.116667</td>\n",
+       "      <td>12.266667</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>78.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>943</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>36</td>\n",
+       "      <td>2000-09-09</td>\n",
+       "      <td>0.295586</td>\n",
+       "      <td>0.295683</td>\n",
+       "      <td>0.312214</td>\n",
+       "      <td>0.265929</td>\n",
+       "      <td>23.12</td>\n",
+       "      <td>300.802857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>12.652857</td>\n",
+       "      <td>14.900000</td>\n",
+       "      <td>28.366667</td>\n",
+       "      <td>12.900000</td>\n",
+       "      <td>35.8</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>56.9</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>944</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>37</td>\n",
+       "      <td>2000-09-16</td>\n",
+       "      <td>0.284657</td>\n",
+       "      <td>0.309757</td>\n",
+       "      <td>0.387883</td>\n",
+       "      <td>0.328157</td>\n",
+       "      <td>34.62</td>\n",
+       "      <td>299.858571</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.227143</td>\n",
+       "      <td>13.857143</td>\n",
+       "      <td>27.425000</td>\n",
+       "      <td>12.775000</td>\n",
+       "      <td>34.5</td>\n",
+       "      <td>20.5</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>945</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>38</td>\n",
+       "      <td>2000-09-23</td>\n",
+       "      <td>0.348814</td>\n",
+       "      <td>0.295717</td>\n",
+       "      <td>0.404843</td>\n",
+       "      <td>0.242571</td>\n",
+       "      <td>97.55</td>\n",
+       "      <td>297.435714</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.338571</td>\n",
+       "      <td>11.314286</td>\n",
+       "      <td>27.533333</td>\n",
+       "      <td>12.566667</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>20.5</td>\n",
+       "      <td>104.2</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>946</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2000-09-30</td>\n",
+       "      <td>0.175686</td>\n",
+       "      <td>0.099483</td>\n",
+       "      <td>0.225714</td>\n",
+       "      <td>0.182786</td>\n",
+       "      <td>95.89</td>\n",
+       "      <td>299.355714</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.798571</td>\n",
+       "      <td>14.942857</td>\n",
+       "      <td>27.150000</td>\n",
+       "      <td>12.175000</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>20.5</td>\n",
+       "      <td>57.9</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>947</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>40</td>\n",
+       "      <td>2000-10-07</td>\n",
+       "      <td>0.337540</td>\n",
+       "      <td>0.276943</td>\n",
+       "      <td>0.277500</td>\n",
+       "      <td>0.255050</td>\n",
+       "      <td>46.22</td>\n",
+       "      <td>298.372857</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.148571</td>\n",
+       "      <td>11.971429</td>\n",
+       "      <td>26.700000</td>\n",
+       "      <td>11.675000</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>948</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>41</td>\n",
+       "      <td>2000-10-14</td>\n",
+       "      <td>0.223533</td>\n",
+       "      <td>0.133914</td>\n",
+       "      <td>0.349800</td>\n",
+       "      <td>0.100917</td>\n",
+       "      <td>31.10</td>\n",
+       "      <td>298.474286</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.071429</td>\n",
+       "      <td>13.485714</td>\n",
+       "      <td>27.657143</td>\n",
+       "      <td>11.300000</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>21.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>949</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2000</td>\n",
+       "      <td>42</td>\n",
+       "      <td>2000-10-21</td>\n",
+       "      <td>0.274800</td>\n",
+       "      <td>0.187057</td>\n",
+       "      <td>0.373943</td>\n",
+       "      <td>0.279471</td>\n",
+       "      <td>25.21</td>\n",
+       "      <td>299.211429</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.528571</td>\n",
+       "      <td>14.928571</td>\n",
+       "      <td>27.775000</td>\n",
+       "      <td>12.275000</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>21.0</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>50 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    city  year  weekofyear week_start_date   ndvi_ne   ndvi_nw   ndvi_se  \\\n",
+       "900   sj  2007          37      2007-09-10       NaN  0.056900  0.238543   \n",
+       "901   sj  2007          38      2007-09-17 -0.013450  0.074900  0.152571   \n",
+       "902   sj  2007          39      2007-09-24 -0.030700 -0.002940  0.152729   \n",
+       "903   sj  2007          40      2007-10-01  0.096000  0.024767  0.185300   \n",
+       "904   sj  2007          41      2007-10-08  0.009000  0.104000  0.118129   \n",
+       "905   sj  2007          42      2007-10-15  0.021000  0.132667  0.245943   \n",
+       "906   sj  2007          43      2007-10-22       NaN -0.009150  0.191186   \n",
+       "907   sj  2007          44      2007-10-29  0.124300  0.054300  0.156814   \n",
+       "908   sj  2007          45      2007-11-05 -0.251700 -0.048600  0.205171   \n",
+       "909   sj  2007          46      2007-11-12 -0.058900 -0.062550  0.205743   \n",
+       "910   sj  2007          47      2007-11-19       NaN       NaN  0.204486   \n",
+       "911   sj  2007          48      2007-11-26 -0.059500 -0.041667  0.090917   \n",
+       "912   sj  2007          49      2007-12-03       NaN -0.042350  0.095600   \n",
+       "913   sj  2007          50      2007-12-10 -0.133050 -0.045550  0.151440   \n",
+       "914   sj  2007          51      2007-12-17       NaN -0.039000  0.173417   \n",
+       "915   sj  2007          52      2007-12-24  0.014800  0.016300  0.207267   \n",
+       "916   sj  2008           1      2008-01-01  0.000600 -0.309600  0.239814   \n",
+       "917   sj  2008           2      2008-01-08       NaN -0.108250  0.330486   \n",
+       "918   sj  2008           3      2008-01-15       NaN  0.038300  0.125000   \n",
+       "919   sj  2008           4      2008-01-22 -0.026800 -0.215300  0.112614   \n",
+       "920   sj  2008           5      2008-01-29       NaN -0.135400  0.223300   \n",
+       "921   sj  2008           6      2008-02-05 -0.111700 -0.003200  0.232843   \n",
+       "922   sj  2008           7      2008-02-12  0.072000 -0.063100  0.150200   \n",
+       "923   sj  2008           8      2008-02-19 -0.138650 -0.095067  0.246057   \n",
+       "924   sj  2008           9      2008-02-26       NaN  0.015420  0.211629   \n",
+       "925   sj  2008          10      2008-03-04 -0.088900 -0.090033  0.223243   \n",
+       "926   sj  2008          11      2008-03-11 -0.321400 -0.141200  0.110643   \n",
+       "927   sj  2008          12      2008-03-18  0.044900  0.024450  0.101629   \n",
+       "928   sj  2008          13      2008-03-25  0.077850 -0.039900  0.310471   \n",
+       "929   sj  2008          14      2008-04-01 -0.038000 -0.016833  0.119371   \n",
+       "930   sj  2008          15      2008-04-08 -0.155200 -0.052750  0.137757   \n",
+       "931   sj  2008          16      2008-04-15  0.001800       NaN  0.203900   \n",
+       "932   sj  2008          17      2008-04-22 -0.037000 -0.010367  0.077314   \n",
+       "933   iq  2000          26      2000-07-01  0.192886  0.132257  0.340886   \n",
+       "934   iq  2000          27      2000-07-08  0.216833  0.276100  0.289457   \n",
+       "935   iq  2000          28      2000-07-15  0.176757  0.173129  0.204114   \n",
+       "936   iq  2000          29      2000-07-22  0.227729  0.145429  0.254200   \n",
+       "937   iq  2000          30      2000-07-29  0.328643  0.322129  0.254371   \n",
+       "938   iq  2000          31      2000-08-05  0.205529  0.190757  0.231671   \n",
+       "939   iq  2000          32      2000-08-12  0.312486  0.329986  0.380586   \n",
+       "940   iq  2000          33      2000-08-19  0.384133  0.392240  0.341780   \n",
+       "941   iq  2000          34      2000-08-26  0.408157  0.322157  0.406714   \n",
+       "942   iq  2000          35      2000-09-02  0.332043  0.321057  0.314614   \n",
+       "943   iq  2000          36      2000-09-09  0.295586  0.295683  0.312214   \n",
+       "944   iq  2000          37      2000-09-16  0.284657  0.309757  0.387883   \n",
+       "945   iq  2000          38      2000-09-23  0.348814  0.295717  0.404843   \n",
+       "946   iq  2000          39      2000-09-30  0.175686  0.099483  0.225714   \n",
+       "947   iq  2000          40      2000-10-07  0.337540  0.276943  0.277500   \n",
+       "948   iq  2000          41      2000-10-14  0.223533  0.133914  0.349800   \n",
+       "949   iq  2000          42      2000-10-21  0.274800  0.187057  0.373943   \n",
+       "\n",
+       "      ndvi_sw  precipitation_amt_mm  reanalysis_air_temp_k  ...  \\\n",
+       "900  0.187486                 10.37             301.117143  ...   \n",
+       "901  0.131929                 70.39             301.217143  ...   \n",
+       "902  0.144629                 94.37             301.052857  ...   \n",
+       "903  0.117729                 74.50             301.022857  ...   \n",
+       "904  0.126343                108.26             300.790000  ...   \n",
+       "905  0.189757                 17.56             301.492857  ...   \n",
+       "906  0.176400                 16.48             301.007143  ...   \n",
+       "907  0.123529                137.55             299.458571  ...   \n",
+       "908  0.172883                 15.25             300.604286  ...   \n",
+       "909  0.202543                 42.00             299.934286  ...   \n",
+       "910  0.156286                 73.37             299.821429  ...   \n",
+       "911  0.129086                 15.95             299.090000  ...   \n",
+       "912  0.089000                 17.85             299.020000  ...   \n",
+       "913  0.143171                 31.30             298.900000  ...   \n",
+       "914  0.150171                 62.11             298.668571  ...   \n",
+       "915  0.144578                  0.00             298.602857  ...   \n",
+       "916  0.195557                  0.00             298.038571  ...   \n",
+       "917  0.244286                 37.24             298.142857  ...   \n",
+       "918  0.108843                  0.00             297.627143  ...   \n",
+       "919  0.160214                 81.22             297.968571  ...   \n",
+       "920  0.170943                  0.00             298.021429  ...   \n",
+       "921  0.271171                  0.00             297.237143  ...   \n",
+       "922  0.149271                  0.00             297.838571  ...   \n",
+       "923  0.228129                  0.00             297.907143  ...   \n",
+       "924  0.117343                  0.00             297.765714  ...   \n",
+       "925  0.154186                  0.00             297.878571  ...   \n",
+       "926  0.141014                  0.00             297.595714  ...   \n",
+       "927  0.088000                  0.00             297.404286  ...   \n",
+       "928  0.296243                 27.19             296.958571  ...   \n",
+       "929  0.066386                  3.82             298.081429  ...   \n",
+       "930  0.141214                 16.96             297.460000  ...   \n",
+       "931  0.209843                  0.00             297.630000  ...   \n",
+       "932  0.090586                  0.00             298.672857  ...   \n",
+       "933  0.247200                 25.41             296.740000  ...   \n",
+       "934  0.241657                 60.61             296.634286  ...   \n",
+       "935  0.128014                 55.52             296.415714  ...   \n",
+       "936  0.200314                  5.60             295.357143  ...   \n",
+       "937  0.361043                 62.76             296.432857  ...   \n",
+       "938  0.255314                 16.24             297.191429  ...   \n",
+       "939  0.387271                 89.37             297.320000  ...   \n",
+       "940  0.382750                 42.08             297.627143  ...   \n",
+       "941  0.302714                 49.22             298.238571  ...   \n",
+       "942  0.324257                 53.65             299.218571  ...   \n",
+       "943  0.265929                 23.12             300.802857  ...   \n",
+       "944  0.328157                 34.62             299.858571  ...   \n",
+       "945  0.242571                 97.55             297.435714  ...   \n",
+       "946  0.182786                 95.89             299.355714  ...   \n",
+       "947  0.255050                 46.22             298.372857  ...   \n",
+       "948  0.100917                 31.10             298.474286  ...   \n",
+       "949  0.279471                 25.21             299.211429  ...   \n",
+       "\n",
+       "     reanalysis_specific_humidity_g_per_kg  reanalysis_tdtr_k  \\\n",
+       "900                              17.720000           3.157143   \n",
+       "901                              18.037143           2.814286   \n",
+       "902                              17.981429           3.585714   \n",
+       "903                              18.118571           2.685714   \n",
+       "904                              18.375714           3.000000   \n",
+       "905                              17.845714           3.185714   \n",
+       "906                              17.275714           2.471429   \n",
+       "907                              17.502857           2.600000   \n",
+       "908                              17.295714           2.257143   \n",
+       "909                              17.082857           3.542857   \n",
+       "910                              17.187143           2.514286   \n",
+       "911                              14.768571           2.071429   \n",
+       "912                              15.675714           2.100000   \n",
+       "913                              16.130000           2.485714   \n",
+       "914                              16.344286           2.371429   \n",
+       "915                              15.318571           2.985714   \n",
+       "916                              14.911429           1.842857   \n",
+       "917                              14.980000           2.057143   \n",
+       "918                              14.488571           3.000000   \n",
+       "919                              15.065714           2.000000   \n",
+       "920                              14.408571           3.300000   \n",
+       "921                              13.225714           2.071429   \n",
+       "922                              14.367143           2.157143   \n",
+       "923                              14.538571           1.885714   \n",
+       "924                              13.967143           2.285714   \n",
+       "925                              14.474286           2.614286   \n",
+       "926                              13.721429           2.085714   \n",
+       "927                              13.737143           3.871429   \n",
+       "928                              13.644286           2.885714   \n",
+       "929                              14.662857           2.714286   \n",
+       "930                              14.184286           2.185714   \n",
+       "931                              13.858571           2.785714   \n",
+       "932                              15.671429           3.957143   \n",
+       "933                              16.651429           8.928571   \n",
+       "934                              16.862857          10.314286   \n",
+       "935                              17.120000           7.385714   \n",
+       "936                              14.431429           9.114286   \n",
+       "937                              15.444286           9.500000   \n",
+       "938                              13.421429          13.771429   \n",
+       "939                              15.311429          11.471429   \n",
+       "940                              15.465714          13.700000   \n",
+       "941                              14.444286          13.771429   \n",
+       "942                              15.057143          12.457143   \n",
+       "943                              12.652857          14.900000   \n",
+       "944                              15.227143          13.857143   \n",
+       "945                              14.338571          11.314286   \n",
+       "946                              14.798571          14.942857   \n",
+       "947                              16.148571          11.971429   \n",
+       "948                              16.071429          13.485714   \n",
+       "949                              15.528571          14.928571   \n",
+       "\n",
+       "     station_avg_temp_c  station_diur_temp_rng_c  station_max_temp_c  \\\n",
+       "900           28.871429                 6.514286                33.9   \n",
+       "901           28.300000                 6.285714                32.8   \n",
+       "902           28.171429                 6.028571                32.2   \n",
+       "903           27.985714                 7.242857                32.8   \n",
+       "904           28.128571                 6.914286                33.3   \n",
+       "905           29.100000                 7.542857                33.9   \n",
+       "906           27.957143                 6.442857                32.2   \n",
+       "907           26.200000                 5.400000                30.6   \n",
+       "908           27.442857                 6.857143                32.2   \n",
+       "909           26.814286                 6.685714                31.1   \n",
+       "910           26.900000                 6.200000                31.1   \n",
+       "911           25.442857                 5.385714                28.9   \n",
+       "912           25.842857                 5.400000                29.4   \n",
+       "913           25.771429                 5.085714                28.9   \n",
+       "914           25.071429                 4.914286                28.9   \n",
+       "915           25.085714                 6.242857                28.3   \n",
+       "916           25.400000                 5.300000                29.4   \n",
+       "917           24.971429                 5.014286                28.3   \n",
+       "918           24.428571                 5.628571                27.8   \n",
+       "919           24.528571                 4.585714                27.8   \n",
+       "920           24.571429                 6.442857                28.9   \n",
+       "921           24.214286                 5.157143                27.2   \n",
+       "922           24.800000                 6.242857                28.3   \n",
+       "923           24.900000                 5.785714                28.3   \n",
+       "924           24.742857                 5.500000                27.8   \n",
+       "925           25.114286                 6.114286                29.4   \n",
+       "926           25.328571                 5.814286                28.9   \n",
+       "927           25.200000                 7.042857                30.0   \n",
+       "928           25.042857                 5.785714                30.0   \n",
+       "929           26.242857                 6.814286                30.6   \n",
+       "930           25.000000                 5.714286                29.4   \n",
+       "931           25.314286                 6.242857                29.4   \n",
+       "932           27.042857                 7.514286                31.7   \n",
+       "933           26.400000                10.775000                32.5   \n",
+       "934           26.900000                11.566667                34.0   \n",
+       "935           26.800000                11.466667                33.0   \n",
+       "936           25.766667                10.533333                31.5   \n",
+       "937           26.600000                11.480000                33.3   \n",
+       "938           25.340000                10.940000                32.0   \n",
+       "939           27.016667                11.650000                34.0   \n",
+       "940           26.583333                10.316667                33.0   \n",
+       "941           26.900000                13.400000                34.0   \n",
+       "942           27.116667                12.266667                34.0   \n",
+       "943           28.366667                12.900000                35.8   \n",
+       "944           27.425000                12.775000                34.5   \n",
+       "945           27.533333                12.566667                36.0   \n",
+       "946           27.150000                12.175000                34.0   \n",
+       "947           26.700000                11.675000                34.0   \n",
+       "948           27.657143                11.300000                34.0   \n",
+       "949           27.775000                12.275000                36.0   \n",
+       "\n",
+       "     station_min_temp_c  station_precip_mm  last_infected_0  last_infected_1  \\\n",
+       "900                25.0               10.4             71.0             92.0   \n",
+       "901                24.4               26.9            112.0             71.0   \n",
+       "902                24.4               21.3            106.0            112.0   \n",
+       "903                22.2               86.6            101.0            106.0   \n",
+       "904                23.9               14.5            170.0            101.0   \n",
+       "905                24.4               10.2            135.0            170.0   \n",
+       "906                24.4                8.6            106.0            135.0   \n",
+       "907                22.2               89.2             68.0            106.0   \n",
+       "908                22.8                4.1             48.0             68.0   \n",
+       "909                22.8               65.7             48.0             48.0   \n",
+       "910                22.8               40.4             26.0             48.0   \n",
+       "911                22.2               36.4             33.0             26.0   \n",
+       "912                22.8               34.5             29.0             33.0   \n",
+       "913                22.2               30.2             17.0             29.0   \n",
+       "914                21.7              108.2             12.0             17.0   \n",
+       "915                21.1               16.8             13.0             12.0   \n",
+       "916                22.2               55.5             17.0             13.0   \n",
+       "917                21.1               64.8             15.0             17.0   \n",
+       "918                20.6                2.5             14.0             15.0   \n",
+       "919                21.1               83.1             15.0             14.0   \n",
+       "920                20.0                3.1             10.0             15.0   \n",
+       "921                21.1               35.9              9.0             10.0   \n",
+       "922                21.1                6.4              2.0              9.0   \n",
+       "923                21.7               13.3              6.0              2.0   \n",
+       "924                21.1               12.9              8.0              6.0   \n",
+       "925                21.1               13.0              5.0              8.0   \n",
+       "926                22.2                4.4              1.0              5.0   \n",
+       "927                20.6                0.5              2.0              1.0   \n",
+       "928                21.1                1.8              3.0              2.0   \n",
+       "929                22.2                0.5              4.0              3.0   \n",
+       "930                21.7               30.7              3.0              4.0   \n",
+       "931                21.7               11.2              1.0              3.0   \n",
+       "932                23.3                0.3              3.0              1.0   \n",
+       "933                20.7                3.0              0.0              0.0   \n",
+       "934                20.8               55.6              0.0              0.0   \n",
+       "935                20.7               38.1              0.0              0.0   \n",
+       "936                14.7               30.0              0.0              0.0   \n",
+       "937                19.1                4.0              0.0              0.0   \n",
+       "938                17.0               11.5              0.0              0.0   \n",
+       "939                19.9               72.9              0.0              0.0   \n",
+       "940                20.5               50.1              0.0              0.0   \n",
+       "941                19.0               89.2              0.0              0.0   \n",
+       "942                20.0               78.0              0.0              0.0   \n",
+       "943                21.7               56.9              0.0              0.0   \n",
+       "944                20.5               18.9              1.0              0.0   \n",
+       "945                20.5              104.2              0.0              1.0   \n",
+       "946                20.5               57.9              0.0              0.0   \n",
+       "947                20.0               63.0              0.0              0.0   \n",
+       "948                21.0                3.0              0.0              0.0   \n",
+       "949                21.0               45.2              1.0              0.0   \n",
+       "\n",
+       "     total_cases  \n",
+       "900          112  \n",
+       "901          106  \n",
+       "902          101  \n",
+       "903          170  \n",
+       "904          135  \n",
+       "905          106  \n",
+       "906           68  \n",
+       "907           48  \n",
+       "908           48  \n",
+       "909           26  \n",
+       "910           33  \n",
+       "911           29  \n",
+       "912           17  \n",
+       "913           12  \n",
+       "914           13  \n",
+       "915           17  \n",
+       "916           15  \n",
+       "917           14  \n",
+       "918           15  \n",
+       "919           10  \n",
+       "920            9  \n",
+       "921            2  \n",
+       "922            6  \n",
+       "923            8  \n",
+       "924            5  \n",
+       "925            1  \n",
+       "926            2  \n",
+       "927            3  \n",
+       "928            4  \n",
+       "929            3  \n",
+       "930            1  \n",
+       "931            3  \n",
+       "932            5  \n",
+       "933            0  \n",
+       "934            0  \n",
+       "935            0  \n",
+       "936            0  \n",
+       "937            0  \n",
+       "938            0  \n",
+       "939            0  \n",
+       "940            0  \n",
+       "941            0  \n",
+       "942            0  \n",
+       "943            1  \n",
+       "944            0  \n",
+       "945            0  \n",
+       "946            0  \n",
+       "947            0  \n",
+       "948            1  \n",
+       "949            1  \n",
+       "\n",
+       "[50 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 58,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tmp[900:950]"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
new file mode 100644
index 0000000..6c1c60c
--- /dev/null
+++ b/utils/LastInfected.py
@@ -0,0 +1,34 @@
+from sklearn.base import BaseEstimator, TransformerMixin
+from collections import deque
+import numpy as np
+import pandas as pd
+
+class LastInfected(BaseEstimator, TransformerMixin):
+    def __init__(self, weeks=1, new_attributes_prefix='last_infected_', copy=True):
+        self.weeks=weeks
+        self.new_attributes_prefix = new_attributes_prefix
+        self.copy=copy
+        dq = deque([0 for _ in range(weeks)])
+        self.last = {'sj': dq.copy(), 'iq': dq.copy()}
+    
+    def fit(self, X, y):
+        self.y = y
+        return self
+    
+    def transform(self, X, model=None):
+        if self.copy:
+            X = X.copy()
+        
+        r = np.ndarray(shape=[X.shape[0], self.weeks])
+
+        for idx, n_infected in enumerate(self.y):
+            city = X.loc[idx, 'city']
+            r[idx] = self.last[city]
+            self.last[city].pop()
+            self.last[city].appendleft(n_infected)
+
+        r = pd.DataFrame(r, columns=[self.new_attributes_prefix + str(week) for week in range(self.weeks)])
+        
+        X = pd.concat([X, r], axis=1)
+
+        return X
\ No newline at end of file
diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py
index 1129f6f..abe0379 100644
--- a/utils/LastWeeks.py
+++ b/utils/LastWeeks.py
@@ -14,7 +14,7 @@ def __init__(self, attributes, weeks=2, new_attributes_prefix='last_weeks_', cop
     def fit(self, X, y=None):
         attr_medians = [np.nanmedian(X[attr]) for attr in self.attributes]
         dq = deque([attr_medians for _ in range(self.weeks)])
-        self.last = {'sj': dq, 'iq': dq}
+        self.last = {'sj': dq.copy(), 'iq': dq.copy()}
 
         return self
 

From 9e0bfee88afac303073e9cc11312d0b640722c17 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Wed, 17 Apr 2019 13:52:08 +0100
Subject: [PATCH 07/24] found 8.49 model with adaboost

---
 models.ipynb | 1735 +++-----------------------------------------------
 1 file changed, 100 insertions(+), 1635 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 0f3dc6c..f88b08d 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 68,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -123,11 +123,13 @@
     "from utils.ContinuityImputer import ContinuityImputer\n",
     "from utils.DataFrameDropper import DataFrameDropper\n",
     "from utils.LastWeeks import LastWeeks\n",
+    "from utils.LastInfected import LastInfected\n",
     "lw = LastWeeks(attributes=['ndvi_ne', 'precipitation_amt_mm', 'reanalysis_relative_humidity_percent'], weeks=3)\n",
     "\n",
     "pipeline = Pipeline([\n",
     "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
     "    ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n",
+    "    ('lf', LastInfected(weeks=3)),\n",
     "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
     "    ('scaler', StandardScaler()),\n",
     "    #('pca', PCA(n_components=0.95))\n",
@@ -136,22 +138,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1451, 80)"
+       "(1451, 83)"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 69,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "X_train = pipeline.fit_transform(X_train_1)\n",
+    "X_train = pipeline.fit_transform(X_train_1, y_train)\n",
     "X_train.shape"
    ]
   },
@@ -272,12 +274,14 @@
     "* 18.38 - Without PCA and with previous weeks. Clearly the previous weeks are useful\n",
     "* 17.87 - Without PCA and with 3 previous weeks\n",
     "* 17.86 - Without PCA and with 4 previous weeks\n",
-    "* 18.28 - Withou PCA 0.95 and 3 previous weeks fixed"
+    "* 18.28 - With PCA 0.95 and 3 previous weeks fixed\n",
+    "* 9.16 - Without PCA, with 3 weeks and 1 last infection (max_depth=5, min_samples_leaf=0.03, min_samples_split=0.108)\n",
+    "* **9.04** - Without PCA, with 3 weeks and 1 last infection (max_depth=5, min_samples_leaf=0.03, min_samples_split=0.108)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 70,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -289,7 +293,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 71,
    "metadata": {},
    "outputs": [
     {
@@ -304,20 +308,20 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    3.3s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   12.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   29.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:   56.1s\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.2min finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    8.0s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   32.2s\n",
+      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  2.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-18.274293590627153"
+       "-9.041006202618883"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 71,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -330,21 +334,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 72,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "DecisionTreeRegressor(criterion='mae', max_depth=2, max_features=None,\n",
+       "DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
        "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-       "           min_impurity_split=None, min_samples_leaf=0.2320229706454773,\n",
-       "           min_samples_split=0.24824690804416838,\n",
+       "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
+       "           min_samples_split=0.107526262482814,\n",
        "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
        "           splitter='best')"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 72,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -359,32 +363,34 @@
    "source": [
     "## Random Forests\n",
     "* 18.34 With 4 previous weeks and without PCA\n",
-    "* **17.79** With fixed 3 previous weeks and PCA at 0.95 (n_estimators= ?, max_depth = 2, min_samples_leaf=0.112, min_samples_split=0.224)\n",
-    "* **17.79** With fixed 3 previous weeks and without PCA (n_estimators= ?, max_depth = 5, min_samples_leaf=0.07, min_samples_split=0.27)"
+    "* 17.79 With fixed 3 previous weeks and PCA at 0.95 (n_estimators= ?, max_depth = 2, min_samples_leaf=0.112, min_samples_split=0.224)\n",
+    "* 17.74 With fixed 3 previous weeks and without PCA (n_estimators= 13 max_depth = 5, min_samples_leaf=0.09, min_samples_split=0.24)\n",
+    "* **9.13** with 3 previous weeks and 1 last infected (n_estimators=9 max_depth = 9, min_samples_leaf=0.014, min_samples_split=0.07)\n",
+    "* 9.22 with 3 previous weeks and 3 last infected (n_estimators=9 max_depth = 9, min_samples_leaf=0.014, min_samples_split=0.08)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 73,
    "metadata": {},
    "outputs": [],
    "source": [
     "k_folds=10\n",
-    "n_iter_search = 30\n",
+    "n_iter_search = 40\n",
     "min_samples = sp_uniform(0.01, 0.35)\n",
     "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 74,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Fitting 10 folds for each of 30 candidates, totalling 300 fits\n"
+      "Fitting 10 folds for each of 40 candidates, totalling 400 fits\n"
      ]
     },
     {
@@ -392,18 +398,18 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   23.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.1min\n",
-      "[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  3.1min finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   24.7s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.4min\n",
+      "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  4.9min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-17.740020145257915"
+       "-9.22168619342982"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 74,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -416,22 +422,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 75,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=9,\n",
        "           max_features='auto', max_leaf_nodes=None,\n",
        "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=0.09435891310910409,\n",
-       "           min_samples_split=0.24914223158891036,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
+       "           min_samples_leaf=0.014927937950279559,\n",
+       "           min_samples_split=0.0795948414310818,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=-1,\n",
        "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 75,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -440,6 +446,63 @@
     "Forest_optimizer.best_estimator_"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Adaboost of Trees\n",
+    "* 10.78 - With 3 last weeks a 3 last infected \n",
+    "* **8.49** - With 3 last weeks a 3 last infected and only max_depth tuned."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k_folds=10\n",
+    "n_iter_search = 10\n",
+    "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
+    "AdaTree_optimizer.fit(X_train, y_train)\n",
+    "AdaTree_optimizer.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n",
+       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+       "           min_impurity_split=None, min_samples_leaf=1,\n",
+       "           min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
+       "           presort=False, random_state=None, splitter='best'),\n",
+       "         learning_rate=1.0, loss='linear', n_estimators=50,\n",
+       "         random_state=None)"
+      ]
+     },
+     "execution_count": 92,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "AdaTree_optimizer.best_estimator_"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -500,1604 +563,6 @@
     "KNN_optimizer.fit(X_train, y_train)\n",
     "KNN_optimizer.best_score_"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%autoreload\n",
-    "from utils.LastInfected import LastInfected\n",
-    "tmp = pd.concat([LastInfected(weeks=2).fit_transform(X_train_1, y=y_train), y_train], axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>city</th>\n",
-       "      <th>year</th>\n",
-       "      <th>weekofyear</th>\n",
-       "      <th>week_start_date</th>\n",
-       "      <th>ndvi_ne</th>\n",
-       "      <th>ndvi_nw</th>\n",
-       "      <th>ndvi_se</th>\n",
-       "      <th>ndvi_sw</th>\n",
-       "      <th>precipitation_amt_mm</th>\n",
-       "      <th>reanalysis_air_temp_k</th>\n",
-       "      <th>...</th>\n",
-       "      <th>reanalysis_specific_humidity_g_per_kg</th>\n",
-       "      <th>reanalysis_tdtr_k</th>\n",
-       "      <th>station_avg_temp_c</th>\n",
-       "      <th>station_diur_temp_rng_c</th>\n",
-       "      <th>station_max_temp_c</th>\n",
-       "      <th>station_min_temp_c</th>\n",
-       "      <th>station_precip_mm</th>\n",
-       "      <th>last_infected_0</th>\n",
-       "      <th>last_infected_1</th>\n",
-       "      <th>total_cases</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>900</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>37</td>\n",
-       "      <td>2007-09-10</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.056900</td>\n",
-       "      <td>0.238543</td>\n",
-       "      <td>0.187486</td>\n",
-       "      <td>10.37</td>\n",
-       "      <td>301.117143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.720000</td>\n",
-       "      <td>3.157143</td>\n",
-       "      <td>28.871429</td>\n",
-       "      <td>6.514286</td>\n",
-       "      <td>33.9</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>10.4</td>\n",
-       "      <td>71.0</td>\n",
-       "      <td>92.0</td>\n",
-       "      <td>112</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>901</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>38</td>\n",
-       "      <td>2007-09-17</td>\n",
-       "      <td>-0.013450</td>\n",
-       "      <td>0.074900</td>\n",
-       "      <td>0.152571</td>\n",
-       "      <td>0.131929</td>\n",
-       "      <td>70.39</td>\n",
-       "      <td>301.217143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>18.037143</td>\n",
-       "      <td>2.814286</td>\n",
-       "      <td>28.300000</td>\n",
-       "      <td>6.285714</td>\n",
-       "      <td>32.8</td>\n",
-       "      <td>24.4</td>\n",
-       "      <td>26.9</td>\n",
-       "      <td>112.0</td>\n",
-       "      <td>71.0</td>\n",
-       "      <td>106</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>902</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>39</td>\n",
-       "      <td>2007-09-24</td>\n",
-       "      <td>-0.030700</td>\n",
-       "      <td>-0.002940</td>\n",
-       "      <td>0.152729</td>\n",
-       "      <td>0.144629</td>\n",
-       "      <td>94.37</td>\n",
-       "      <td>301.052857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.981429</td>\n",
-       "      <td>3.585714</td>\n",
-       "      <td>28.171429</td>\n",
-       "      <td>6.028571</td>\n",
-       "      <td>32.2</td>\n",
-       "      <td>24.4</td>\n",
-       "      <td>21.3</td>\n",
-       "      <td>106.0</td>\n",
-       "      <td>112.0</td>\n",
-       "      <td>101</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>903</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>40</td>\n",
-       "      <td>2007-10-01</td>\n",
-       "      <td>0.096000</td>\n",
-       "      <td>0.024767</td>\n",
-       "      <td>0.185300</td>\n",
-       "      <td>0.117729</td>\n",
-       "      <td>74.50</td>\n",
-       "      <td>301.022857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>18.118571</td>\n",
-       "      <td>2.685714</td>\n",
-       "      <td>27.985714</td>\n",
-       "      <td>7.242857</td>\n",
-       "      <td>32.8</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>86.6</td>\n",
-       "      <td>101.0</td>\n",
-       "      <td>106.0</td>\n",
-       "      <td>170</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>904</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>41</td>\n",
-       "      <td>2007-10-08</td>\n",
-       "      <td>0.009000</td>\n",
-       "      <td>0.104000</td>\n",
-       "      <td>0.118129</td>\n",
-       "      <td>0.126343</td>\n",
-       "      <td>108.26</td>\n",
-       "      <td>300.790000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>18.375714</td>\n",
-       "      <td>3.000000</td>\n",
-       "      <td>28.128571</td>\n",
-       "      <td>6.914286</td>\n",
-       "      <td>33.3</td>\n",
-       "      <td>23.9</td>\n",
-       "      <td>14.5</td>\n",
-       "      <td>170.0</td>\n",
-       "      <td>101.0</td>\n",
-       "      <td>135</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>905</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>42</td>\n",
-       "      <td>2007-10-15</td>\n",
-       "      <td>0.021000</td>\n",
-       "      <td>0.132667</td>\n",
-       "      <td>0.245943</td>\n",
-       "      <td>0.189757</td>\n",
-       "      <td>17.56</td>\n",
-       "      <td>301.492857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.845714</td>\n",
-       "      <td>3.185714</td>\n",
-       "      <td>29.100000</td>\n",
-       "      <td>7.542857</td>\n",
-       "      <td>33.9</td>\n",
-       "      <td>24.4</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>135.0</td>\n",
-       "      <td>170.0</td>\n",
-       "      <td>106</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>906</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>43</td>\n",
-       "      <td>2007-10-22</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>-0.009150</td>\n",
-       "      <td>0.191186</td>\n",
-       "      <td>0.176400</td>\n",
-       "      <td>16.48</td>\n",
-       "      <td>301.007143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.275714</td>\n",
-       "      <td>2.471429</td>\n",
-       "      <td>27.957143</td>\n",
-       "      <td>6.442857</td>\n",
-       "      <td>32.2</td>\n",
-       "      <td>24.4</td>\n",
-       "      <td>8.6</td>\n",
-       "      <td>106.0</td>\n",
-       "      <td>135.0</td>\n",
-       "      <td>68</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>907</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>44</td>\n",
-       "      <td>2007-10-29</td>\n",
-       "      <td>0.124300</td>\n",
-       "      <td>0.054300</td>\n",
-       "      <td>0.156814</td>\n",
-       "      <td>0.123529</td>\n",
-       "      <td>137.55</td>\n",
-       "      <td>299.458571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.502857</td>\n",
-       "      <td>2.600000</td>\n",
-       "      <td>26.200000</td>\n",
-       "      <td>5.400000</td>\n",
-       "      <td>30.6</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>89.2</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>106.0</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>908</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>45</td>\n",
-       "      <td>2007-11-05</td>\n",
-       "      <td>-0.251700</td>\n",
-       "      <td>-0.048600</td>\n",
-       "      <td>0.205171</td>\n",
-       "      <td>0.172883</td>\n",
-       "      <td>15.25</td>\n",
-       "      <td>300.604286</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.295714</td>\n",
-       "      <td>2.257143</td>\n",
-       "      <td>27.442857</td>\n",
-       "      <td>6.857143</td>\n",
-       "      <td>32.2</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>4.1</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>909</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>46</td>\n",
-       "      <td>2007-11-12</td>\n",
-       "      <td>-0.058900</td>\n",
-       "      <td>-0.062550</td>\n",
-       "      <td>0.205743</td>\n",
-       "      <td>0.202543</td>\n",
-       "      <td>42.00</td>\n",
-       "      <td>299.934286</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.082857</td>\n",
-       "      <td>3.542857</td>\n",
-       "      <td>26.814286</td>\n",
-       "      <td>6.685714</td>\n",
-       "      <td>31.1</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>65.7</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>910</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>47</td>\n",
-       "      <td>2007-11-19</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.204486</td>\n",
-       "      <td>0.156286</td>\n",
-       "      <td>73.37</td>\n",
-       "      <td>299.821429</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.187143</td>\n",
-       "      <td>2.514286</td>\n",
-       "      <td>26.900000</td>\n",
-       "      <td>6.200000</td>\n",
-       "      <td>31.1</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>40.4</td>\n",
-       "      <td>26.0</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>33</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>911</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>48</td>\n",
-       "      <td>2007-11-26</td>\n",
-       "      <td>-0.059500</td>\n",
-       "      <td>-0.041667</td>\n",
-       "      <td>0.090917</td>\n",
-       "      <td>0.129086</td>\n",
-       "      <td>15.95</td>\n",
-       "      <td>299.090000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.768571</td>\n",
-       "      <td>2.071429</td>\n",
-       "      <td>25.442857</td>\n",
-       "      <td>5.385714</td>\n",
-       "      <td>28.9</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>36.4</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>26.0</td>\n",
-       "      <td>29</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>912</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>49</td>\n",
-       "      <td>2007-12-03</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>-0.042350</td>\n",
-       "      <td>0.095600</td>\n",
-       "      <td>0.089000</td>\n",
-       "      <td>17.85</td>\n",
-       "      <td>299.020000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.675714</td>\n",
-       "      <td>2.100000</td>\n",
-       "      <td>25.842857</td>\n",
-       "      <td>5.400000</td>\n",
-       "      <td>29.4</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>34.5</td>\n",
-       "      <td>29.0</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>17</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>913</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>50</td>\n",
-       "      <td>2007-12-10</td>\n",
-       "      <td>-0.133050</td>\n",
-       "      <td>-0.045550</td>\n",
-       "      <td>0.151440</td>\n",
-       "      <td>0.143171</td>\n",
-       "      <td>31.30</td>\n",
-       "      <td>298.900000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16.130000</td>\n",
-       "      <td>2.485714</td>\n",
-       "      <td>25.771429</td>\n",
-       "      <td>5.085714</td>\n",
-       "      <td>28.9</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>30.2</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>29.0</td>\n",
-       "      <td>12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>914</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>51</td>\n",
-       "      <td>2007-12-17</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>-0.039000</td>\n",
-       "      <td>0.173417</td>\n",
-       "      <td>0.150171</td>\n",
-       "      <td>62.11</td>\n",
-       "      <td>298.668571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16.344286</td>\n",
-       "      <td>2.371429</td>\n",
-       "      <td>25.071429</td>\n",
-       "      <td>4.914286</td>\n",
-       "      <td>28.9</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>108.2</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>13</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>915</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2007</td>\n",
-       "      <td>52</td>\n",
-       "      <td>2007-12-24</td>\n",
-       "      <td>0.014800</td>\n",
-       "      <td>0.016300</td>\n",
-       "      <td>0.207267</td>\n",
-       "      <td>0.144578</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>298.602857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.318571</td>\n",
-       "      <td>2.985714</td>\n",
-       "      <td>25.085714</td>\n",
-       "      <td>6.242857</td>\n",
-       "      <td>28.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>13.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>17</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>916</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2008-01-01</td>\n",
-       "      <td>0.000600</td>\n",
-       "      <td>-0.309600</td>\n",
-       "      <td>0.239814</td>\n",
-       "      <td>0.195557</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>298.038571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.911429</td>\n",
-       "      <td>1.842857</td>\n",
-       "      <td>25.400000</td>\n",
-       "      <td>5.300000</td>\n",
-       "      <td>29.4</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>55.5</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>13.0</td>\n",
-       "      <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>917</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2008-01-08</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>-0.108250</td>\n",
-       "      <td>0.330486</td>\n",
-       "      <td>0.244286</td>\n",
-       "      <td>37.24</td>\n",
-       "      <td>298.142857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.980000</td>\n",
-       "      <td>2.057143</td>\n",
-       "      <td>24.971429</td>\n",
-       "      <td>5.014286</td>\n",
-       "      <td>28.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>64.8</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>918</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2008-01-15</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.038300</td>\n",
-       "      <td>0.125000</td>\n",
-       "      <td>0.108843</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.627143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.488571</td>\n",
-       "      <td>3.000000</td>\n",
-       "      <td>24.428571</td>\n",
-       "      <td>5.628571</td>\n",
-       "      <td>27.8</td>\n",
-       "      <td>20.6</td>\n",
-       "      <td>2.5</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>919</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>4</td>\n",
-       "      <td>2008-01-22</td>\n",
-       "      <td>-0.026800</td>\n",
-       "      <td>-0.215300</td>\n",
-       "      <td>0.112614</td>\n",
-       "      <td>0.160214</td>\n",
-       "      <td>81.22</td>\n",
-       "      <td>297.968571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.065714</td>\n",
-       "      <td>2.000000</td>\n",
-       "      <td>24.528571</td>\n",
-       "      <td>4.585714</td>\n",
-       "      <td>27.8</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>83.1</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>920</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2008-01-29</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>-0.135400</td>\n",
-       "      <td>0.223300</td>\n",
-       "      <td>0.170943</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>298.021429</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.408571</td>\n",
-       "      <td>3.300000</td>\n",
-       "      <td>24.571429</td>\n",
-       "      <td>6.442857</td>\n",
-       "      <td>28.9</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>3.1</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>9</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>921</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>6</td>\n",
-       "      <td>2008-02-05</td>\n",
-       "      <td>-0.111700</td>\n",
-       "      <td>-0.003200</td>\n",
-       "      <td>0.232843</td>\n",
-       "      <td>0.271171</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.237143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.225714</td>\n",
-       "      <td>2.071429</td>\n",
-       "      <td>24.214286</td>\n",
-       "      <td>5.157143</td>\n",
-       "      <td>27.2</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>35.9</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>922</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>7</td>\n",
-       "      <td>2008-02-12</td>\n",
-       "      <td>0.072000</td>\n",
-       "      <td>-0.063100</td>\n",
-       "      <td>0.150200</td>\n",
-       "      <td>0.149271</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.838571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.367143</td>\n",
-       "      <td>2.157143</td>\n",
-       "      <td>24.800000</td>\n",
-       "      <td>6.242857</td>\n",
-       "      <td>28.3</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>6.4</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>9.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>923</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>8</td>\n",
-       "      <td>2008-02-19</td>\n",
-       "      <td>-0.138650</td>\n",
-       "      <td>-0.095067</td>\n",
-       "      <td>0.246057</td>\n",
-       "      <td>0.228129</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.907143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.538571</td>\n",
-       "      <td>1.885714</td>\n",
-       "      <td>24.900000</td>\n",
-       "      <td>5.785714</td>\n",
-       "      <td>28.3</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>13.3</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>924</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>9</td>\n",
-       "      <td>2008-02-26</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.015420</td>\n",
-       "      <td>0.211629</td>\n",
-       "      <td>0.117343</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.765714</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.967143</td>\n",
-       "      <td>2.285714</td>\n",
-       "      <td>24.742857</td>\n",
-       "      <td>5.500000</td>\n",
-       "      <td>27.8</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>12.9</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>925</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>10</td>\n",
-       "      <td>2008-03-04</td>\n",
-       "      <td>-0.088900</td>\n",
-       "      <td>-0.090033</td>\n",
-       "      <td>0.223243</td>\n",
-       "      <td>0.154186</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.878571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.474286</td>\n",
-       "      <td>2.614286</td>\n",
-       "      <td>25.114286</td>\n",
-       "      <td>6.114286</td>\n",
-       "      <td>29.4</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>13.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>926</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>11</td>\n",
-       "      <td>2008-03-11</td>\n",
-       "      <td>-0.321400</td>\n",
-       "      <td>-0.141200</td>\n",
-       "      <td>0.110643</td>\n",
-       "      <td>0.141014</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.595714</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.721429</td>\n",
-       "      <td>2.085714</td>\n",
-       "      <td>25.328571</td>\n",
-       "      <td>5.814286</td>\n",
-       "      <td>28.9</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>4.4</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>927</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>12</td>\n",
-       "      <td>2008-03-18</td>\n",
-       "      <td>0.044900</td>\n",
-       "      <td>0.024450</td>\n",
-       "      <td>0.101629</td>\n",
-       "      <td>0.088000</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.404286</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.737143</td>\n",
-       "      <td>3.871429</td>\n",
-       "      <td>25.200000</td>\n",
-       "      <td>7.042857</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>20.6</td>\n",
-       "      <td>0.5</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>928</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>13</td>\n",
-       "      <td>2008-03-25</td>\n",
-       "      <td>0.077850</td>\n",
-       "      <td>-0.039900</td>\n",
-       "      <td>0.310471</td>\n",
-       "      <td>0.296243</td>\n",
-       "      <td>27.19</td>\n",
-       "      <td>296.958571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.644286</td>\n",
-       "      <td>2.885714</td>\n",
-       "      <td>25.042857</td>\n",
-       "      <td>5.785714</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>1.8</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>929</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>14</td>\n",
-       "      <td>2008-04-01</td>\n",
-       "      <td>-0.038000</td>\n",
-       "      <td>-0.016833</td>\n",
-       "      <td>0.119371</td>\n",
-       "      <td>0.066386</td>\n",
-       "      <td>3.82</td>\n",
-       "      <td>298.081429</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.662857</td>\n",
-       "      <td>2.714286</td>\n",
-       "      <td>26.242857</td>\n",
-       "      <td>6.814286</td>\n",
-       "      <td>30.6</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>0.5</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>930</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>15</td>\n",
-       "      <td>2008-04-08</td>\n",
-       "      <td>-0.155200</td>\n",
-       "      <td>-0.052750</td>\n",
-       "      <td>0.137757</td>\n",
-       "      <td>0.141214</td>\n",
-       "      <td>16.96</td>\n",
-       "      <td>297.460000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.184286</td>\n",
-       "      <td>2.185714</td>\n",
-       "      <td>25.000000</td>\n",
-       "      <td>5.714286</td>\n",
-       "      <td>29.4</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>30.7</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>931</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>16</td>\n",
-       "      <td>2008-04-15</td>\n",
-       "      <td>0.001800</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.203900</td>\n",
-       "      <td>0.209843</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>297.630000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.858571</td>\n",
-       "      <td>2.785714</td>\n",
-       "      <td>25.314286</td>\n",
-       "      <td>6.242857</td>\n",
-       "      <td>29.4</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>11.2</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>932</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>17</td>\n",
-       "      <td>2008-04-22</td>\n",
-       "      <td>-0.037000</td>\n",
-       "      <td>-0.010367</td>\n",
-       "      <td>0.077314</td>\n",
-       "      <td>0.090586</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>298.672857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.671429</td>\n",
-       "      <td>3.957143</td>\n",
-       "      <td>27.042857</td>\n",
-       "      <td>7.514286</td>\n",
-       "      <td>31.7</td>\n",
-       "      <td>23.3</td>\n",
-       "      <td>0.3</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>933</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>26</td>\n",
-       "      <td>2000-07-01</td>\n",
-       "      <td>0.192886</td>\n",
-       "      <td>0.132257</td>\n",
-       "      <td>0.340886</td>\n",
-       "      <td>0.247200</td>\n",
-       "      <td>25.41</td>\n",
-       "      <td>296.740000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16.651429</td>\n",
-       "      <td>8.928571</td>\n",
-       "      <td>26.400000</td>\n",
-       "      <td>10.775000</td>\n",
-       "      <td>32.5</td>\n",
-       "      <td>20.7</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>934</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>27</td>\n",
-       "      <td>2000-07-08</td>\n",
-       "      <td>0.216833</td>\n",
-       "      <td>0.276100</td>\n",
-       "      <td>0.289457</td>\n",
-       "      <td>0.241657</td>\n",
-       "      <td>60.61</td>\n",
-       "      <td>296.634286</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16.862857</td>\n",
-       "      <td>10.314286</td>\n",
-       "      <td>26.900000</td>\n",
-       "      <td>11.566667</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>20.8</td>\n",
-       "      <td>55.6</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>935</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>28</td>\n",
-       "      <td>2000-07-15</td>\n",
-       "      <td>0.176757</td>\n",
-       "      <td>0.173129</td>\n",
-       "      <td>0.204114</td>\n",
-       "      <td>0.128014</td>\n",
-       "      <td>55.52</td>\n",
-       "      <td>296.415714</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.120000</td>\n",
-       "      <td>7.385714</td>\n",
-       "      <td>26.800000</td>\n",
-       "      <td>11.466667</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>20.7</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>936</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>29</td>\n",
-       "      <td>2000-07-22</td>\n",
-       "      <td>0.227729</td>\n",
-       "      <td>0.145429</td>\n",
-       "      <td>0.254200</td>\n",
-       "      <td>0.200314</td>\n",
-       "      <td>5.60</td>\n",
-       "      <td>295.357143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.431429</td>\n",
-       "      <td>9.114286</td>\n",
-       "      <td>25.766667</td>\n",
-       "      <td>10.533333</td>\n",
-       "      <td>31.5</td>\n",
-       "      <td>14.7</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>937</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>30</td>\n",
-       "      <td>2000-07-29</td>\n",
-       "      <td>0.328643</td>\n",
-       "      <td>0.322129</td>\n",
-       "      <td>0.254371</td>\n",
-       "      <td>0.361043</td>\n",
-       "      <td>62.76</td>\n",
-       "      <td>296.432857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.444286</td>\n",
-       "      <td>9.500000</td>\n",
-       "      <td>26.600000</td>\n",
-       "      <td>11.480000</td>\n",
-       "      <td>33.3</td>\n",
-       "      <td>19.1</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>938</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>31</td>\n",
-       "      <td>2000-08-05</td>\n",
-       "      <td>0.205529</td>\n",
-       "      <td>0.190757</td>\n",
-       "      <td>0.231671</td>\n",
-       "      <td>0.255314</td>\n",
-       "      <td>16.24</td>\n",
-       "      <td>297.191429</td>\n",
-       "      <td>...</td>\n",
-       "      <td>13.421429</td>\n",
-       "      <td>13.771429</td>\n",
-       "      <td>25.340000</td>\n",
-       "      <td>10.940000</td>\n",
-       "      <td>32.0</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>11.5</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>939</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>32</td>\n",
-       "      <td>2000-08-12</td>\n",
-       "      <td>0.312486</td>\n",
-       "      <td>0.329986</td>\n",
-       "      <td>0.380586</td>\n",
-       "      <td>0.387271</td>\n",
-       "      <td>89.37</td>\n",
-       "      <td>297.320000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.311429</td>\n",
-       "      <td>11.471429</td>\n",
-       "      <td>27.016667</td>\n",
-       "      <td>11.650000</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>19.9</td>\n",
-       "      <td>72.9</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>940</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>33</td>\n",
-       "      <td>2000-08-19</td>\n",
-       "      <td>0.384133</td>\n",
-       "      <td>0.392240</td>\n",
-       "      <td>0.341780</td>\n",
-       "      <td>0.382750</td>\n",
-       "      <td>42.08</td>\n",
-       "      <td>297.627143</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.465714</td>\n",
-       "      <td>13.700000</td>\n",
-       "      <td>26.583333</td>\n",
-       "      <td>10.316667</td>\n",
-       "      <td>33.0</td>\n",
-       "      <td>20.5</td>\n",
-       "      <td>50.1</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>941</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>34</td>\n",
-       "      <td>2000-08-26</td>\n",
-       "      <td>0.408157</td>\n",
-       "      <td>0.322157</td>\n",
-       "      <td>0.406714</td>\n",
-       "      <td>0.302714</td>\n",
-       "      <td>49.22</td>\n",
-       "      <td>298.238571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.444286</td>\n",
-       "      <td>13.771429</td>\n",
-       "      <td>26.900000</td>\n",
-       "      <td>13.400000</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>89.2</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>942</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>35</td>\n",
-       "      <td>2000-09-02</td>\n",
-       "      <td>0.332043</td>\n",
-       "      <td>0.321057</td>\n",
-       "      <td>0.314614</td>\n",
-       "      <td>0.324257</td>\n",
-       "      <td>53.65</td>\n",
-       "      <td>299.218571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.057143</td>\n",
-       "      <td>12.457143</td>\n",
-       "      <td>27.116667</td>\n",
-       "      <td>12.266667</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>78.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>943</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>36</td>\n",
-       "      <td>2000-09-09</td>\n",
-       "      <td>0.295586</td>\n",
-       "      <td>0.295683</td>\n",
-       "      <td>0.312214</td>\n",
-       "      <td>0.265929</td>\n",
-       "      <td>23.12</td>\n",
-       "      <td>300.802857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>12.652857</td>\n",
-       "      <td>14.900000</td>\n",
-       "      <td>28.366667</td>\n",
-       "      <td>12.900000</td>\n",
-       "      <td>35.8</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>56.9</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>944</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>37</td>\n",
-       "      <td>2000-09-16</td>\n",
-       "      <td>0.284657</td>\n",
-       "      <td>0.309757</td>\n",
-       "      <td>0.387883</td>\n",
-       "      <td>0.328157</td>\n",
-       "      <td>34.62</td>\n",
-       "      <td>299.858571</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.227143</td>\n",
-       "      <td>13.857143</td>\n",
-       "      <td>27.425000</td>\n",
-       "      <td>12.775000</td>\n",
-       "      <td>34.5</td>\n",
-       "      <td>20.5</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>945</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>38</td>\n",
-       "      <td>2000-09-23</td>\n",
-       "      <td>0.348814</td>\n",
-       "      <td>0.295717</td>\n",
-       "      <td>0.404843</td>\n",
-       "      <td>0.242571</td>\n",
-       "      <td>97.55</td>\n",
-       "      <td>297.435714</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.338571</td>\n",
-       "      <td>11.314286</td>\n",
-       "      <td>27.533333</td>\n",
-       "      <td>12.566667</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>20.5</td>\n",
-       "      <td>104.2</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>946</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>39</td>\n",
-       "      <td>2000-09-30</td>\n",
-       "      <td>0.175686</td>\n",
-       "      <td>0.099483</td>\n",
-       "      <td>0.225714</td>\n",
-       "      <td>0.182786</td>\n",
-       "      <td>95.89</td>\n",
-       "      <td>299.355714</td>\n",
-       "      <td>...</td>\n",
-       "      <td>14.798571</td>\n",
-       "      <td>14.942857</td>\n",
-       "      <td>27.150000</td>\n",
-       "      <td>12.175000</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>20.5</td>\n",
-       "      <td>57.9</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>947</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>40</td>\n",
-       "      <td>2000-10-07</td>\n",
-       "      <td>0.337540</td>\n",
-       "      <td>0.276943</td>\n",
-       "      <td>0.277500</td>\n",
-       "      <td>0.255050</td>\n",
-       "      <td>46.22</td>\n",
-       "      <td>298.372857</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16.148571</td>\n",
-       "      <td>11.971429</td>\n",
-       "      <td>26.700000</td>\n",
-       "      <td>11.675000</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>948</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>41</td>\n",
-       "      <td>2000-10-14</td>\n",
-       "      <td>0.223533</td>\n",
-       "      <td>0.133914</td>\n",
-       "      <td>0.349800</td>\n",
-       "      <td>0.100917</td>\n",
-       "      <td>31.10</td>\n",
-       "      <td>298.474286</td>\n",
-       "      <td>...</td>\n",
-       "      <td>16.071429</td>\n",
-       "      <td>13.485714</td>\n",
-       "      <td>27.657143</td>\n",
-       "      <td>11.300000</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>21.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>949</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2000</td>\n",
-       "      <td>42</td>\n",
-       "      <td>2000-10-21</td>\n",
-       "      <td>0.274800</td>\n",
-       "      <td>0.187057</td>\n",
-       "      <td>0.373943</td>\n",
-       "      <td>0.279471</td>\n",
-       "      <td>25.21</td>\n",
-       "      <td>299.211429</td>\n",
-       "      <td>...</td>\n",
-       "      <td>15.528571</td>\n",
-       "      <td>14.928571</td>\n",
-       "      <td>27.775000</td>\n",
-       "      <td>12.275000</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>21.0</td>\n",
-       "      <td>45.2</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>50 rows × 27 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    city  year  weekofyear week_start_date   ndvi_ne   ndvi_nw   ndvi_se  \\\n",
-       "900   sj  2007          37      2007-09-10       NaN  0.056900  0.238543   \n",
-       "901   sj  2007          38      2007-09-17 -0.013450  0.074900  0.152571   \n",
-       "902   sj  2007          39      2007-09-24 -0.030700 -0.002940  0.152729   \n",
-       "903   sj  2007          40      2007-10-01  0.096000  0.024767  0.185300   \n",
-       "904   sj  2007          41      2007-10-08  0.009000  0.104000  0.118129   \n",
-       "905   sj  2007          42      2007-10-15  0.021000  0.132667  0.245943   \n",
-       "906   sj  2007          43      2007-10-22       NaN -0.009150  0.191186   \n",
-       "907   sj  2007          44      2007-10-29  0.124300  0.054300  0.156814   \n",
-       "908   sj  2007          45      2007-11-05 -0.251700 -0.048600  0.205171   \n",
-       "909   sj  2007          46      2007-11-12 -0.058900 -0.062550  0.205743   \n",
-       "910   sj  2007          47      2007-11-19       NaN       NaN  0.204486   \n",
-       "911   sj  2007          48      2007-11-26 -0.059500 -0.041667  0.090917   \n",
-       "912   sj  2007          49      2007-12-03       NaN -0.042350  0.095600   \n",
-       "913   sj  2007          50      2007-12-10 -0.133050 -0.045550  0.151440   \n",
-       "914   sj  2007          51      2007-12-17       NaN -0.039000  0.173417   \n",
-       "915   sj  2007          52      2007-12-24  0.014800  0.016300  0.207267   \n",
-       "916   sj  2008           1      2008-01-01  0.000600 -0.309600  0.239814   \n",
-       "917   sj  2008           2      2008-01-08       NaN -0.108250  0.330486   \n",
-       "918   sj  2008           3      2008-01-15       NaN  0.038300  0.125000   \n",
-       "919   sj  2008           4      2008-01-22 -0.026800 -0.215300  0.112614   \n",
-       "920   sj  2008           5      2008-01-29       NaN -0.135400  0.223300   \n",
-       "921   sj  2008           6      2008-02-05 -0.111700 -0.003200  0.232843   \n",
-       "922   sj  2008           7      2008-02-12  0.072000 -0.063100  0.150200   \n",
-       "923   sj  2008           8      2008-02-19 -0.138650 -0.095067  0.246057   \n",
-       "924   sj  2008           9      2008-02-26       NaN  0.015420  0.211629   \n",
-       "925   sj  2008          10      2008-03-04 -0.088900 -0.090033  0.223243   \n",
-       "926   sj  2008          11      2008-03-11 -0.321400 -0.141200  0.110643   \n",
-       "927   sj  2008          12      2008-03-18  0.044900  0.024450  0.101629   \n",
-       "928   sj  2008          13      2008-03-25  0.077850 -0.039900  0.310471   \n",
-       "929   sj  2008          14      2008-04-01 -0.038000 -0.016833  0.119371   \n",
-       "930   sj  2008          15      2008-04-08 -0.155200 -0.052750  0.137757   \n",
-       "931   sj  2008          16      2008-04-15  0.001800       NaN  0.203900   \n",
-       "932   sj  2008          17      2008-04-22 -0.037000 -0.010367  0.077314   \n",
-       "933   iq  2000          26      2000-07-01  0.192886  0.132257  0.340886   \n",
-       "934   iq  2000          27      2000-07-08  0.216833  0.276100  0.289457   \n",
-       "935   iq  2000          28      2000-07-15  0.176757  0.173129  0.204114   \n",
-       "936   iq  2000          29      2000-07-22  0.227729  0.145429  0.254200   \n",
-       "937   iq  2000          30      2000-07-29  0.328643  0.322129  0.254371   \n",
-       "938   iq  2000          31      2000-08-05  0.205529  0.190757  0.231671   \n",
-       "939   iq  2000          32      2000-08-12  0.312486  0.329986  0.380586   \n",
-       "940   iq  2000          33      2000-08-19  0.384133  0.392240  0.341780   \n",
-       "941   iq  2000          34      2000-08-26  0.408157  0.322157  0.406714   \n",
-       "942   iq  2000          35      2000-09-02  0.332043  0.321057  0.314614   \n",
-       "943   iq  2000          36      2000-09-09  0.295586  0.295683  0.312214   \n",
-       "944   iq  2000          37      2000-09-16  0.284657  0.309757  0.387883   \n",
-       "945   iq  2000          38      2000-09-23  0.348814  0.295717  0.404843   \n",
-       "946   iq  2000          39      2000-09-30  0.175686  0.099483  0.225714   \n",
-       "947   iq  2000          40      2000-10-07  0.337540  0.276943  0.277500   \n",
-       "948   iq  2000          41      2000-10-14  0.223533  0.133914  0.349800   \n",
-       "949   iq  2000          42      2000-10-21  0.274800  0.187057  0.373943   \n",
-       "\n",
-       "      ndvi_sw  precipitation_amt_mm  reanalysis_air_temp_k  ...  \\\n",
-       "900  0.187486                 10.37             301.117143  ...   \n",
-       "901  0.131929                 70.39             301.217143  ...   \n",
-       "902  0.144629                 94.37             301.052857  ...   \n",
-       "903  0.117729                 74.50             301.022857  ...   \n",
-       "904  0.126343                108.26             300.790000  ...   \n",
-       "905  0.189757                 17.56             301.492857  ...   \n",
-       "906  0.176400                 16.48             301.007143  ...   \n",
-       "907  0.123529                137.55             299.458571  ...   \n",
-       "908  0.172883                 15.25             300.604286  ...   \n",
-       "909  0.202543                 42.00             299.934286  ...   \n",
-       "910  0.156286                 73.37             299.821429  ...   \n",
-       "911  0.129086                 15.95             299.090000  ...   \n",
-       "912  0.089000                 17.85             299.020000  ...   \n",
-       "913  0.143171                 31.30             298.900000  ...   \n",
-       "914  0.150171                 62.11             298.668571  ...   \n",
-       "915  0.144578                  0.00             298.602857  ...   \n",
-       "916  0.195557                  0.00             298.038571  ...   \n",
-       "917  0.244286                 37.24             298.142857  ...   \n",
-       "918  0.108843                  0.00             297.627143  ...   \n",
-       "919  0.160214                 81.22             297.968571  ...   \n",
-       "920  0.170943                  0.00             298.021429  ...   \n",
-       "921  0.271171                  0.00             297.237143  ...   \n",
-       "922  0.149271                  0.00             297.838571  ...   \n",
-       "923  0.228129                  0.00             297.907143  ...   \n",
-       "924  0.117343                  0.00             297.765714  ...   \n",
-       "925  0.154186                  0.00             297.878571  ...   \n",
-       "926  0.141014                  0.00             297.595714  ...   \n",
-       "927  0.088000                  0.00             297.404286  ...   \n",
-       "928  0.296243                 27.19             296.958571  ...   \n",
-       "929  0.066386                  3.82             298.081429  ...   \n",
-       "930  0.141214                 16.96             297.460000  ...   \n",
-       "931  0.209843                  0.00             297.630000  ...   \n",
-       "932  0.090586                  0.00             298.672857  ...   \n",
-       "933  0.247200                 25.41             296.740000  ...   \n",
-       "934  0.241657                 60.61             296.634286  ...   \n",
-       "935  0.128014                 55.52             296.415714  ...   \n",
-       "936  0.200314                  5.60             295.357143  ...   \n",
-       "937  0.361043                 62.76             296.432857  ...   \n",
-       "938  0.255314                 16.24             297.191429  ...   \n",
-       "939  0.387271                 89.37             297.320000  ...   \n",
-       "940  0.382750                 42.08             297.627143  ...   \n",
-       "941  0.302714                 49.22             298.238571  ...   \n",
-       "942  0.324257                 53.65             299.218571  ...   \n",
-       "943  0.265929                 23.12             300.802857  ...   \n",
-       "944  0.328157                 34.62             299.858571  ...   \n",
-       "945  0.242571                 97.55             297.435714  ...   \n",
-       "946  0.182786                 95.89             299.355714  ...   \n",
-       "947  0.255050                 46.22             298.372857  ...   \n",
-       "948  0.100917                 31.10             298.474286  ...   \n",
-       "949  0.279471                 25.21             299.211429  ...   \n",
-       "\n",
-       "     reanalysis_specific_humidity_g_per_kg  reanalysis_tdtr_k  \\\n",
-       "900                              17.720000           3.157143   \n",
-       "901                              18.037143           2.814286   \n",
-       "902                              17.981429           3.585714   \n",
-       "903                              18.118571           2.685714   \n",
-       "904                              18.375714           3.000000   \n",
-       "905                              17.845714           3.185714   \n",
-       "906                              17.275714           2.471429   \n",
-       "907                              17.502857           2.600000   \n",
-       "908                              17.295714           2.257143   \n",
-       "909                              17.082857           3.542857   \n",
-       "910                              17.187143           2.514286   \n",
-       "911                              14.768571           2.071429   \n",
-       "912                              15.675714           2.100000   \n",
-       "913                              16.130000           2.485714   \n",
-       "914                              16.344286           2.371429   \n",
-       "915                              15.318571           2.985714   \n",
-       "916                              14.911429           1.842857   \n",
-       "917                              14.980000           2.057143   \n",
-       "918                              14.488571           3.000000   \n",
-       "919                              15.065714           2.000000   \n",
-       "920                              14.408571           3.300000   \n",
-       "921                              13.225714           2.071429   \n",
-       "922                              14.367143           2.157143   \n",
-       "923                              14.538571           1.885714   \n",
-       "924                              13.967143           2.285714   \n",
-       "925                              14.474286           2.614286   \n",
-       "926                              13.721429           2.085714   \n",
-       "927                              13.737143           3.871429   \n",
-       "928                              13.644286           2.885714   \n",
-       "929                              14.662857           2.714286   \n",
-       "930                              14.184286           2.185714   \n",
-       "931                              13.858571           2.785714   \n",
-       "932                              15.671429           3.957143   \n",
-       "933                              16.651429           8.928571   \n",
-       "934                              16.862857          10.314286   \n",
-       "935                              17.120000           7.385714   \n",
-       "936                              14.431429           9.114286   \n",
-       "937                              15.444286           9.500000   \n",
-       "938                              13.421429          13.771429   \n",
-       "939                              15.311429          11.471429   \n",
-       "940                              15.465714          13.700000   \n",
-       "941                              14.444286          13.771429   \n",
-       "942                              15.057143          12.457143   \n",
-       "943                              12.652857          14.900000   \n",
-       "944                              15.227143          13.857143   \n",
-       "945                              14.338571          11.314286   \n",
-       "946                              14.798571          14.942857   \n",
-       "947                              16.148571          11.971429   \n",
-       "948                              16.071429          13.485714   \n",
-       "949                              15.528571          14.928571   \n",
-       "\n",
-       "     station_avg_temp_c  station_diur_temp_rng_c  station_max_temp_c  \\\n",
-       "900           28.871429                 6.514286                33.9   \n",
-       "901           28.300000                 6.285714                32.8   \n",
-       "902           28.171429                 6.028571                32.2   \n",
-       "903           27.985714                 7.242857                32.8   \n",
-       "904           28.128571                 6.914286                33.3   \n",
-       "905           29.100000                 7.542857                33.9   \n",
-       "906           27.957143                 6.442857                32.2   \n",
-       "907           26.200000                 5.400000                30.6   \n",
-       "908           27.442857                 6.857143                32.2   \n",
-       "909           26.814286                 6.685714                31.1   \n",
-       "910           26.900000                 6.200000                31.1   \n",
-       "911           25.442857                 5.385714                28.9   \n",
-       "912           25.842857                 5.400000                29.4   \n",
-       "913           25.771429                 5.085714                28.9   \n",
-       "914           25.071429                 4.914286                28.9   \n",
-       "915           25.085714                 6.242857                28.3   \n",
-       "916           25.400000                 5.300000                29.4   \n",
-       "917           24.971429                 5.014286                28.3   \n",
-       "918           24.428571                 5.628571                27.8   \n",
-       "919           24.528571                 4.585714                27.8   \n",
-       "920           24.571429                 6.442857                28.9   \n",
-       "921           24.214286                 5.157143                27.2   \n",
-       "922           24.800000                 6.242857                28.3   \n",
-       "923           24.900000                 5.785714                28.3   \n",
-       "924           24.742857                 5.500000                27.8   \n",
-       "925           25.114286                 6.114286                29.4   \n",
-       "926           25.328571                 5.814286                28.9   \n",
-       "927           25.200000                 7.042857                30.0   \n",
-       "928           25.042857                 5.785714                30.0   \n",
-       "929           26.242857                 6.814286                30.6   \n",
-       "930           25.000000                 5.714286                29.4   \n",
-       "931           25.314286                 6.242857                29.4   \n",
-       "932           27.042857                 7.514286                31.7   \n",
-       "933           26.400000                10.775000                32.5   \n",
-       "934           26.900000                11.566667                34.0   \n",
-       "935           26.800000                11.466667                33.0   \n",
-       "936           25.766667                10.533333                31.5   \n",
-       "937           26.600000                11.480000                33.3   \n",
-       "938           25.340000                10.940000                32.0   \n",
-       "939           27.016667                11.650000                34.0   \n",
-       "940           26.583333                10.316667                33.0   \n",
-       "941           26.900000                13.400000                34.0   \n",
-       "942           27.116667                12.266667                34.0   \n",
-       "943           28.366667                12.900000                35.8   \n",
-       "944           27.425000                12.775000                34.5   \n",
-       "945           27.533333                12.566667                36.0   \n",
-       "946           27.150000                12.175000                34.0   \n",
-       "947           26.700000                11.675000                34.0   \n",
-       "948           27.657143                11.300000                34.0   \n",
-       "949           27.775000                12.275000                36.0   \n",
-       "\n",
-       "     station_min_temp_c  station_precip_mm  last_infected_0  last_infected_1  \\\n",
-       "900                25.0               10.4             71.0             92.0   \n",
-       "901                24.4               26.9            112.0             71.0   \n",
-       "902                24.4               21.3            106.0            112.0   \n",
-       "903                22.2               86.6            101.0            106.0   \n",
-       "904                23.9               14.5            170.0            101.0   \n",
-       "905                24.4               10.2            135.0            170.0   \n",
-       "906                24.4                8.6            106.0            135.0   \n",
-       "907                22.2               89.2             68.0            106.0   \n",
-       "908                22.8                4.1             48.0             68.0   \n",
-       "909                22.8               65.7             48.0             48.0   \n",
-       "910                22.8               40.4             26.0             48.0   \n",
-       "911                22.2               36.4             33.0             26.0   \n",
-       "912                22.8               34.5             29.0             33.0   \n",
-       "913                22.2               30.2             17.0             29.0   \n",
-       "914                21.7              108.2             12.0             17.0   \n",
-       "915                21.1               16.8             13.0             12.0   \n",
-       "916                22.2               55.5             17.0             13.0   \n",
-       "917                21.1               64.8             15.0             17.0   \n",
-       "918                20.6                2.5             14.0             15.0   \n",
-       "919                21.1               83.1             15.0             14.0   \n",
-       "920                20.0                3.1             10.0             15.0   \n",
-       "921                21.1               35.9              9.0             10.0   \n",
-       "922                21.1                6.4              2.0              9.0   \n",
-       "923                21.7               13.3              6.0              2.0   \n",
-       "924                21.1               12.9              8.0              6.0   \n",
-       "925                21.1               13.0              5.0              8.0   \n",
-       "926                22.2                4.4              1.0              5.0   \n",
-       "927                20.6                0.5              2.0              1.0   \n",
-       "928                21.1                1.8              3.0              2.0   \n",
-       "929                22.2                0.5              4.0              3.0   \n",
-       "930                21.7               30.7              3.0              4.0   \n",
-       "931                21.7               11.2              1.0              3.0   \n",
-       "932                23.3                0.3              3.0              1.0   \n",
-       "933                20.7                3.0              0.0              0.0   \n",
-       "934                20.8               55.6              0.0              0.0   \n",
-       "935                20.7               38.1              0.0              0.0   \n",
-       "936                14.7               30.0              0.0              0.0   \n",
-       "937                19.1                4.0              0.0              0.0   \n",
-       "938                17.0               11.5              0.0              0.0   \n",
-       "939                19.9               72.9              0.0              0.0   \n",
-       "940                20.5               50.1              0.0              0.0   \n",
-       "941                19.0               89.2              0.0              0.0   \n",
-       "942                20.0               78.0              0.0              0.0   \n",
-       "943                21.7               56.9              0.0              0.0   \n",
-       "944                20.5               18.9              1.0              0.0   \n",
-       "945                20.5              104.2              0.0              1.0   \n",
-       "946                20.5               57.9              0.0              0.0   \n",
-       "947                20.0               63.0              0.0              0.0   \n",
-       "948                21.0                3.0              0.0              0.0   \n",
-       "949                21.0               45.2              1.0              0.0   \n",
-       "\n",
-       "     total_cases  \n",
-       "900          112  \n",
-       "901          106  \n",
-       "902          101  \n",
-       "903          170  \n",
-       "904          135  \n",
-       "905          106  \n",
-       "906           68  \n",
-       "907           48  \n",
-       "908           48  \n",
-       "909           26  \n",
-       "910           33  \n",
-       "911           29  \n",
-       "912           17  \n",
-       "913           12  \n",
-       "914           13  \n",
-       "915           17  \n",
-       "916           15  \n",
-       "917           14  \n",
-       "918           15  \n",
-       "919           10  \n",
-       "920            9  \n",
-       "921            2  \n",
-       "922            6  \n",
-       "923            8  \n",
-       "924            5  \n",
-       "925            1  \n",
-       "926            2  \n",
-       "927            3  \n",
-       "928            4  \n",
-       "929            3  \n",
-       "930            1  \n",
-       "931            3  \n",
-       "932            5  \n",
-       "933            0  \n",
-       "934            0  \n",
-       "935            0  \n",
-       "936            0  \n",
-       "937            0  \n",
-       "938            0  \n",
-       "939            0  \n",
-       "940            0  \n",
-       "941            0  \n",
-       "942            0  \n",
-       "943            1  \n",
-       "944            0  \n",
-       "945            0  \n",
-       "946            0  \n",
-       "947            0  \n",
-       "948            1  \n",
-       "949            1  \n",
-       "\n",
-       "[50 rows x 27 columns]"
-      ]
-     },
-     "execution_count": 58,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tmp[900:950]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {

From ad1e138d93888f3582aa24360e30ba42f670c4d7 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Wed, 17 Apr 2019 18:47:38 +0100
Subject: [PATCH 08/24] refactored the pipeline

---
 OurPipeline.py    | 21 +++++++++++++++++++++
 models.ipynb      | 35 +++++++----------------------------
 utils/__init__.py |  0
 3 files changed, 28 insertions(+), 28 deletions(-)
 create mode 100644 OurPipeline.py
 create mode 100644 utils/__init__.py

diff --git a/OurPipeline.py b/OurPipeline.py
new file mode 100644
index 0000000..08454be
--- /dev/null
+++ b/OurPipeline.py
@@ -0,0 +1,21 @@
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+from utils.ContinuityImputer import ContinuityImputer
+from utils.DataFrameDropper import DataFrameDropper
+from utils.LastWeeks import LastWeeks
+from utils.LastInfected import LastInfected
+
+def create_pipeline(attr, n_weeks, pca_n_components=None,  n_non_train=4):
+    pipelist = [
+        ('imputer', ContinuityImputer(attributes=attr[n_non_train:])),
+        ('lw', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)),
+        ('lf', LastInfected(weeks=n_weeks)),
+        ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])),
+        ('scaler', StandardScaler()),
+    ]
+
+    if pca_n_components is not None:
+        pipelist.append(('pca', PCA(n_components=pca_n_components)))
+
+    return Pipeline(pipelist)
\ No newline at end of file
diff --git a/models.ipynb b/models.ipynb
index f88b08d..f26a577 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,33 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%autoreload\n",
-    "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "from sklearn.decomposition import PCA\n",
-    "from utils.ContinuityImputer import ContinuityImputer\n",
-    "from utils.DataFrameDropper import DataFrameDropper\n",
-    "from utils.LastWeeks import LastWeeks\n",
-    "from utils.LastInfected import LastInfected\n",
-    "lw = LastWeeks(attributes=['ndvi_ne', 'precipitation_amt_mm', 'reanalysis_relative_humidity_percent'], weeks=3)\n",
-    "\n",
-    "pipeline = Pipeline([\n",
-    "    ('imputer', ContinuityImputer(attributes=attr[4:])),\n",
-    "    ('lw', LastWeeks(attributes=attr[4:], weeks=3)),\n",
-    "    ('lf', LastInfected(weeks=3)),\n",
-    "    ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:4])),\n",
-    "    ('scaler', StandardScaler()),\n",
-    "    #('pca', PCA(n_components=0.95))\n",
-    "])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -147,12 +121,17 @@
        "(1451, 83)"
       ]
      },
-     "execution_count": 69,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=None)\n",
+    "\n",
     "X_train = pipeline.fit_transform(X_train_1, y_train)\n",
     "X_train.shape"
    ]
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29

From dfa75a029613ebc28b776ffcc14a6e0ca8aaaf12 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Wed, 17 Apr 2019 19:28:43 +0100
Subject: [PATCH 09/24] modeled pca

---
 analysis.ipynb | 103 ++++++++++++++++++++++++-------------------------
 models.ipynb   |   4 +-
 2 files changed, 53 insertions(+), 54 deletions(-)

diff --git a/analysis.ipynb b/analysis.ipynb
index 587a6db..76e407c 100644
--- a/analysis.ipynb
+++ b/analysis.ipynb
@@ -598,11 +598,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 56,
    "metadata": {},
    "outputs": [],
    "source": [
-    "train_data = train_data[train_data['weekofyear'] != 53]"
+    "train_data = train_data[train_data['weekofyear'] != 53]\n",
+    "train_data.reset_index(drop=True, inplace=True)"
    ]
   },
   {
@@ -1688,7 +1689,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -1720,7 +1721,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -1744,7 +1745,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -1775,7 +1776,7 @@
        "Name: total_cases, dtype: float64"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1797,7 +1798,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -1806,7 +1807,7 @@
        "(1451, 22)"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1817,7 +1818,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -1845,7 +1846,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -1879,7 +1880,7 @@
        "dtype: float64"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1895,7 +1896,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -1935,7 +1936,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -2252,7 +2253,7 @@
        "max          2.228153e+00       1.063787e+01  "
       ]
      },
-     "execution_count": 21,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2284,7 +2285,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2294,7 +2295,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -2303,7 +2304,7 @@
        "2"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2326,7 +2327,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -2361,7 +2362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -2532,7 +2533,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -2541,7 +2542,7 @@
        "('2000-07-01', '2010-06-25')"
       ]
      },
-     "execution_count": 47,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2552,7 +2553,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -2561,7 +2562,7 @@
        "('2010-07-02', '2013-06-25')"
       ]
      },
-     "execution_count": 50,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2572,7 +2573,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -2581,7 +2582,7 @@
        "('1990-04-30', '2008-04-22')"
       ]
      },
-     "execution_count": 48,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2592,7 +2593,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -2601,7 +2602,7 @@
        "('2008-04-29', '2013-04-23')"
       ]
      },
-     "execution_count": 52,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2612,7 +2613,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -2644,7 +2645,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -2691,7 +2692,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2707,7 +2708,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -2754,7 +2755,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2763,7 +2764,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -2811,37 +2812,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 57,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,\n",
-       "  svd_solver='auto', tol=0.0, whiten=False)"
+       "(1451, 65)"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 57,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pca = PCA()\n",
-    "pca.fit(X_train)\n",
-    "pca"
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "attr=list(train_data)[:-1]\n",
+    "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=0.999)\n",
+    "\n",
+    "X_train = pipeline.fit_transform(train_data.iloc[:,:-1].copy(), train_data.iloc[:,-1].copy())\n",
+    "X_train.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 63,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA34AAAE/CAYAAAAZshH0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3X2czXX+//HH2xhmmBG5ylXGRhKGmEJXtNZFpYmimq2trBStpbaWrotq6cJX6GK7kESNKDJFmshVWxJF9SNr29qQcpGLYRgz5vX743NmzDA4w5xzPjOe99vt3M45n8/nvD/Pz1DOa97vz/vtzAwREREREREpu8pFOoCIiIiIiIiElgo/ERERERGRMk6Fn4iIiIiISBmnwk9ERERERKSMU+EnIiIiIiJSxqnwExERERERKeNU+ImISKnjnHvNOfdYkMd+4Jy7KQQZEpxz5pwrX9JtF3Gui5xza0N9HhERKbtC/o+ViIicvJxzPwK1gQMFNr9mZoPClcHMLg3XuULFzJYATSOdQ0RESi8VfiIiEmpXmNm8SIcorZxz5c0sJ9I5RESkdNNQTxERiQjn3AvOuXcKvH/COTffeTo55zY45+5zzm11zv3onLv+CO1Uc86975zb4pzbHnhdv8D+hc65WwKvb3bOfeKcezpw7A/OuUsLHHuKc26Cc26Tc26jc+4x51xUYF9U4HNbnXP/BS4/yrUNc869fci2sc65cYHXfZ1za5xzGc65/zrnbitwXN61D3PO/QJMzNtW4Jh7nHPfBz6/2jnXq8C+Y13jqc65ic65nwP73y2wr4dzbqVzbodz7lPnXOKRrlFEREoXFX4iIhIpdwEtA4XKRUA/4CYzs8D+04AaQD3gJuAl51xRwx3LAROBhsDpwF7g2aOctx2wNtD2k8AE55wL7HsNyAEaA+cAXYFbAvv6Az0C25OA3kc5x1TgMudcPHhFI3AN8GZg/+ZAW1WAvsAY51ybAp8/DTg1cE23FtH+98BFwCnAcGCKc65OkNc4GagENAdqAWMCGc8BXgVuA6oDLwJpzrmKR7lOEREpJVT4iYhIqL0b6EHKe/QHMLNM4E/A/wFTgL+a2YZDPvugmWWZ2SJgNl7xVIiZbTOzd8ws08wygMeBjkfJ8z8ze9nMDgCTgDpAbedcbeAy4A4z22Nmm/GKousCn7sGeMbM1pvZb8DII53AzP4HfAnk9cT9Hsg0s6WB/bPN7HvzLALS8Qq5PLnAw4Fr31tE+9PN7GczyzWzt4B1wHlBXGMd4FJggJltN7PswPnBKzBfNLPPzeyAmU0CsoD2R/lZiohIKaF7/EREJNR6HukePzP7PDBsshYw7ZDd281sT4H3/wPqHtqGc64SXoHWHagW2BzvnIsKFD6H+qXA+TMDHWFxeD1s0cCmg51jlAPWB17XLfA6L8/RvAmkAK8Df+Rgbx+BoZcPA2cGzlEJ+KbAZ7eY2b4jNeycuxH4G5AQ2BSH17sXzDX+Zmbbi2i2IXCTc+6vBbZVoIifuYiIlD7q8RMRkYhxzv0FqAj8DAw9ZHc151zlAu9PDxx3qLvwZrxsZ2ZVgIvzmi9mnPV4PVw1zKxq4FHFzJoH9m8CGhyS52imA50C9xv2IlD4BYZOvgM8DdQ2s6rAnEPyGkfgnGsIvAwMAqoHPv8twV3veuBU51zVI+x7vMC1VzWzSmaWGkS7IiLicyr8REQkIpxzZwKPATfgDfkc6pxrfchhw51zFQL3APbAK6YOFY93X98O59ypeD1pxWZmm/CGXI52zlVxzpVzzp3hnMsbNjoNGOycq++cqwbcc4z2tgAL8e4//MHM1gR2VcArdrcAOYHev67FiFoZrzDcAt5EMUCLYlzjB8DzgUlxop1zeYXyy8AA51y7wAQ7lZ1zl+fdpygiIqWbCj8REQm195xzuws8Zjpv0fMpwBNmtsrM1gH3AZMLTCbyC7Adr5fvDbz70r4rov1ngFhgK7AUmHsCWW/EK8xWB879Nt79ceAVRh8Cq/Du35sRRHtvAn+gwDDPwH2Ig/EKye14w0DTgg1oZquB0cBnwK9AS+BfwX4er8jOBr7Dm2TmjkC7y/EmsHk2kOs/wM3FaFdERHzMHZw8TURExB+cc52AKWZW/1jHioiIyLGpx09ERERERKSMU+EnIiIiIiJSxmmop4iIiIiISBmnHj8REREREZEyToWfiIiIiIhIGVc+0gFORI0aNSwhISHSMUrUnj17qFy58rEPjAC/ZvNrLvBvNr/mAv9m82su8G825So+v2bzay7wbza/5gL/ZlOu4vNrNr/mAn9nO14rVqzYamY1j3mgmZXaR9u2ba2sWbBgQaQjHJFfs/k1l5l/s/k1l5l/s/k1l5l/sylX8fk1m19zmfk3m19zmfk3m3IVn1+z+TWXmb+zHS9guQVRO2mop4iIiIiISBmnwk9ERERERKSMU+EnIiIiUgqMHTuWFi1a0Lx5c5555hkAVq1aRYcOHWjZsiVXXHEFu3btUq5SkM2vuf785z9Tq1YtWrRokb/tt99+o0uXLjRp0oQuXbqwfft2wLtdbPDgwTRu3JjExES+/PLLkzZbaVGqJ3cpSnZ2Nhs2bGDfvn2RjnJcTjnlFNasWRPpGEXyaza/5oLgs8XExFC/fn2io6PDkEpEJPLGjh3Lyy+/jJnRv39/7rjjDlatWsWAAQPYvXs3CQkJvPHGG1SpUkW5gG+//ZaXX36ZZcuWUaFCBbp3706PHj245ZZbePrpp+nYsSOvvvoqTz31FI8++uhJn8vP2fyaC+Dmm29m0KBB3HjjjfnbRo0aRefOnbnnnnsYNWoUo0aN4oknnuCDDz5g3bp1rFu3js8//5yBAwfy+eefn5TZSosyV/ht2LCB+Ph4EhIScM5FOk6xZWRkEB8fH+kYRfJrNr/mguCymRnbtm1jw4YNNGrUKEzJREQix69ffP2aC2DNmjW0a9eOSpUqAdCxY0dmzJjBv//9by6++GIAunTpQrdu3cKaza+5/JzNr7kALr74Yn788cdC22bNmsXChQsBuOmmm+jUqRNPPPEEs2bN4sYbb8Q5R/v27dmxYwebNm2iTp06J1220qLMDfXct28f1atXL5VFn5ycnHNUr1691PZSi4i/FTWkbOXKlbRv357WrVuTlJTEsmXLwpqp4Bff8uXLH/GL7zvvvKNcAS1atGDJkiVs27aNzMxM5syZw/r162nevDmzZs0CYPr06axfv165fJ7Nr7mO5Ndff80vmE477TR+/fVXADZu3EiDBg3yj6tfvz4bN25UNh8rc4UfoKJPSh39nRWRUCjYg7Vq1Sref/99/vOf/zB06FAefvhhVq5cyYgRIxg6dGhYc/n1i69fcwE0a9aMYcOG0bVrV7p3707r1q2Jiori1Vdf5fnnn6dt27ZkZGRQoUIF5fJ5Nr/mCoZzzrffWfyczS/KZOEXaY8//jjNmzcnMTGR1q1bh3VM8XfffUfr1q0555xz+P7770Nyjptvvpm33377qMc89NBDzJs3r0TO16lTJ5YvX14ibRVUkhlF5OTmx141OHIPlnMuf+KInTt3Urdu3bDm8usXX7/mytOvXz9WrFjB4sWLqVatGmeeeSZnnXUW6enprFixgpSUFM444wzlKgXZ/JqrKLVr12bTpk0AbNq0iVq1agFQr169Qr8E2bBhA/Xq1VM2H1PhV8I+++wz3n//fb788ku+/vpr5s2bV6irOdTeffddevfuzVdffRXR/2GMGDGCP/zhDxE7/7EcOHDA9xlFpHTwa68aHLkH65lnnuHvf/87DRo04O6772bkyJFhz+bXL75+zQWwefNmAH766SdmzJjBH//4x/xtubm5PPbYYwwYMEC5SkE2v+YqSnJyMpMmTQJg0qRJXHnllfnbX3/9dcyMpUuXcsopp4T9Hjo/Z/OjkBV+zrlXnXObnXPfFth2qnPuI+fcusBztcB255wb55z7j3Pua+dcm1DlCrVNmzZRo0YNKlasCECNGjXyf5OakJDA1q1bAVi+fDmdOnUC4JFHHuGmm27ioosuonnz5syYMYOhQ4fSsmVLunfvTnZ29mHnyftNcmJiIr169WL79u3MmTOHZ555hhdeeIFLLrnksM+kp6fToUMH2rRpQ58+fdi9ezc7d+6kadOmrF27FoCUlBRefvllAOLi4rjzzjtp3rw5nTt3zs9e0IgRIzj33HNp0aIFt956K2YGFO4VTEhI4OGHH6ZNmza0bNmS7777DoA9e/bw5z//mfPOO49zzjknfxjN3r17ue6662jWrBm9evVi7969h5137ty59OnTJ//9woUL6dGjBwADBw4kKSmJ5s2b8/DDD+cfk5CQwLBhw2jTpg3Tp08vlPFI19GpUyeGDRvGeeedx5lnnsmSJUsAr3C8++67adGiBYmJiYwfPx6AFStW0LFjR9q2bUu3bt345ZdfABg3bhxnn302iYmJXHfddYddj4gEp6ietWuvvZbWrVvTunVrEhISaN26dVgz+bVXDY7cg/XCCy8wZswY1q9fz5gxY+jXr1/Ys/n1i69fcwFcffXVnH322VxxxRU899xzVK1aldTU1PzitG7duvTt21e5SkE2v+ZKSUmhQ4cOrF27lvr16zNhwgTuuecePvroI5o0acK8efO45557ALjsssv43e9+R+PGjenfvz/PP//8SZut1DCzkDyAi4E2wLcFtj0J3BN4fQ/wROD1ZcAHgAPaA58Hc462bdvaoVavXn3YtnDKyMiwVq1aWZMmTWzgwIG2cOHC/H0NGza0LVu2mJnZF198YR07djQzs4cfftguuOAC279/v/3rX/+y2NhYmzNnjpmZ9ezZ02bOnHnYeVq2bJnf9oMPPmhDhgzJb+upp5467PgtW7bYRRddZLt37zYzs1GjRtnw4cPNzCw9Pd3at29vqamp1q1bt/zPADZlyhQzMxs+fLj179/fzMxuuukmmz59upmZbdu2Lf/4G264wdLS0g47pmHDhjZu3DgzM3vuueesX79+ZmZ277332uTJk83MbPv27dakSRPbvXu3jR492vr27WtmZqtWrbKoqCj74osvCl1Pdna2NWjQwHbv3m27du2yAQMG5LeVlyknJ8c6duxoq1atys/xxBNP5LcRzHV07NjR/va3v5mZ2ezZs61z585mZvb888/b1VdfbdnZ2fmf379/v3Xo0ME2b95sZmZTp061G264wczM6tSpY/v27cu/1qKE8+/uggULwnau4vJrNr/mMvNvtpLO9c0331jz5s1tz549lp2dbZ07d7Z169YVOuZvf/tb/v/bwpVr9erV1qRJE9u6davt2bPH2rdvb4MGDbLVq1dbgwYNrH79+la3bl378ccfj9lWqP8s7733XnvuueesSpUqlpuba2Zmubm5Fh8fH/ZcF154oTVr1swSExNt3rx5Zmb2zDPPWJMmTaxJkyY2bNiw/IzhzObXXCXJr9mUq/j8ms2vucz8ne14AcstiNopZMs5mNli51zCIZuvBDoFXk8CFgLDAttfDwRf6pyr6pyrY2abTijEHXfAypUn1MRhWreGwG+ZixIXF8eKFStYsmQJCxYs4Nprr2XUqFHcfPPNR2320ksvJTo6mubNm3PgwAG6d+8OQMuWLQ+bunbnzp3s2LGDjh07At70tQV7v4qydOlSVq9ezQUXXADA/v376dChA+DNUDZ9+nT+8pe/sGrVqvzPlCtXjmuvvRaAG264gZ49ex7W7oIFC3jyySfJzMzkt99+o3nz5lxxxRWHHXfVVVcB0LZtW2bMmAF4PZBpaWk8/fTTgDcj608//cTixYsZPHgwAImJiSQmJh7WXvny5enevTvvvfce3bp1Y/bs2Tz55JMATJs2jZdeeomcnBw2bdrE6tWr89vIu57iXEfB7Hl/FvPmzWPAgAGUL+/9J3Tqqafy7bff8u2339KlSxfA6xWsWbNm/nVcf/319OzZs8ifo4gc25GmQM8bQmlmTJs2jY8//jisuQr2qlWuXPmwXrWrr76aadOm0a9fv4jcV7x582Zq1aqV34O1dOlSxo8fz6JFi+jUqRMff/wxTZo0CXuuvBEUBQ0ZMoQhQ4aEPUtBJZErLS2NiRMnsmvXLpKTk0sy3gkLWzYzyM0N+pH2wQe8PnUqu374geTAv6OYeY9Qvz7KvrRFi3gjLY1dV1xBcuB7V6FrPNr1l8T2o+xLW7yYN+bMYddll5F80UVH/nyw5ymhz6R98glv5uW68MLiny+E0j75hNcXLfLlf5vhEO51/GoXKOZ+AWoHXtcDCk6RtSGw7cQKvwiJioqiU6dOdOrUiZYtWzJp0iRuvvlmypcvT25uLsBhU/fnDQ0tV64c0dHR+bMSlStXjpycnBPOZGZ06dKF1NTUw/bl5uayZs0aKlWqxPbt26lfv36RbRw6U9K+ffu4/fbbWb58OQ0aNOCRRx454pIEedcXFRWVfz1mxjvvvEPTpk2P65quu+46nn32WWJiYkhKSiI+Pp4ffviBp59+mi+++IJq1apx8803F8pUuXLlw9o51nUUlb0oZkbz5s357LPP8rdlZGQAMHv2bBYvXsx7773H448/zjfffJNfNIr4TVELWD/yyCO8/PLL+b/M+Mc//sFll10W1lwtWrTg/vvvZ9u2bcTGxjJnzhySkpLy9y9ZsoTatWtHpIjp169f/nDJ++67j/r163PvvfcyduxYAPr06cMtt9wS9lzgDSnbtm0b0dHR+UPKXn75ZYYMGUJOTg4xMTG89NJLEcl2osJSxJhBdjZkZcH+/cd8Tlu8mJSnnyZz/37S58whdeBAkhMT4cAByMnxnkPxCKLttK1bSfn+ezLNSJ81i9QGDUiOiytWgRb0ozh/jkAKkAl88OGHpAJ++EpeMNf7ixb5Jhcckm3xYt9kK5jrvSVLfJMLDvl7tngxqampJ13xF7FvnmZmzrli/9rBOXcrcCt4M/nkLdqY55RTTsn/sk2oFr3Ma78I69atwzlH48aNAfj888+pU6cOGRkZNGjQgCVLltC1a1dSU1M5cOAAGRkZZGVlER0dTUZGBgcOHAicwjtHwX15ypUrxymnnMKHH37I+eefzyuvvEKHDh0Oa6ugFi1a8Mknn7By5UrOOOMM9uzZw88//0yTJk0YP348jRs35oEHHuCmm25i3rx5REdHk5uby+TJk+nduzcTJ06kXbt2ZGRkkJ2dzd69e9myZQtmRsWKFdm0aRPTpk3jyiuvLHRMRkYGZsbu3bupWLEie/bsyb/uSy65hNGjR/P000/jnGPVqlW0atWKdu3aMWnSJM4991xWr17N119/zZ49ew67pjZt2rBixQpyc3Pp1asXGRkZbNq0idjYWMqVK8f333/PnDlzaN++/WE5gKCu48CBA/nn3r17N2ZGRkYGF110Ec899xxJSUmUL1+e3377jbp16/Lrr78yb9482rVrR3Z2NmvXruXss89m/fr1JCUl0apVK1JTU9m0aRNVq1YtdD379u077O9zqOzevTts5youv2bzay4o2Ww//PBD/r3C0dHRDB06lJo1a/Ljjz+SnJxcqNf8WOcMxc/syiuvpEOHDsTGxpKQkMCmTZvyzzFmzBjOO++8iOTavn071apV49dff2XKlCk8//zzVK1albFjx9K6dWtWrFjBaaedFpFshy4Andf+6NGj87dlZGQc9bzh/PvvDhyg3P79x3ws+vpr/v722+zLyWHu++/zTNeu/KF+fcplZ1MuOxuX95yT430mJyd/W8HtLifnsM/kvw7sK450vC+WgFf8jR1brC++Vq5c/oNy5bCoqODeB57J+3wR79N27SIz0HuTacas7Gw61KiBOecdl/ec175zoXsucM5pixaRGRhxlAlMa92apoHJ1wwg7xfPzuW/LvHtBfblfTlNff99MgMzs2cCqe3b0+iQQuGoX2SPtLRAcbcXcZ43332XzMAvmjOBN88/n4bBjig6niUPgvzMmzNnkvmvfx3MdcEFNOzVq/jnC4FC2TIzmThxIlWqVIlwqvAKd+H3a94QTudcHWBzYPtGoODUl/UD2w5jZi8BLwEkJSVZ3gQpedasWUN8fHxJ5w6amTFo0CB27NhB+fLlady4MS+99BLx8fGMGDGCfv36MXLkSDp16kRUVBTx8fFUrFiRihUrEh8fn1/c5F1DwX0FTZ48mQEDBpCZmcnvfvc7Jk6ceFhbBcXHxzNp0iT69+9PVlYWAI899hiVK1dm8uTJLFu2jPj4eD788EPGjh3L8OHDqVy5Mt988w2jR4+mVq1aTJgwgfj4eKKjo4mNjaVBgwbceuutdOjQgdNOO4127drlnzvvmPj4eJxzxMXFER8fT+XKlfOv+9FHH+WOO+7gggsuIDc3l0aNGvH+++9zxx130LdvX8477zyaNWtG27ZtqVy5cpF/rldccQWvvfYab775JpUqVeL888+nbdu2nHvuuTRo0IALL7yQmJiYw3IAQV1HVFRU/rmzsrJwzhEfH8+gQYP46aefuOCCC4iOjqZ///4MGjSIGTNmMHjwYHbu3ElOTg4DBgwgKSmJAQMGsHPnTsyMIUOGFDnTa0xMDOecc86J/yUMwsKFCzn0vx2/8Gs2v+aCks22ZcsWfv/73+cPN+/ZsycbN24kISGBuLi4Yp0nFD+zTp068dRTTwEHe9Y6depETk4O1157LStWrDjiqIVQ5rrooovye9UmTpxI586dqVq1KkOGDGHixInExMSQmppK27Ztw57tuB04ABkZpL3zDpNTU/lTjx4kt2sH+/Z5j717D74+1qM4xwY5yuVFIG9sxr6cHL6ZM4fb8naWKwcVK0KFCkd+jos7+v7jfO66dCkT77+fzH37qBQTQ9exY6FbN4iK8h7lyx98feijXDlvLbIQ/HECJKelkZqSQmZmJpUqVeLKf/6Tmj7o8bgmLY2ZBXJdM3w4TX2QKyUxkbQCuVLuvZeWPsgF8MezzuK9Atn+OGwYrXyQ7Y9NmhTONXSoL3LB4dn69u3rn//fhomz4xnrG2zj3j1+75tZi8D7p4BtZjbKOXcPcKqZDXXOXQ4MwpvkpR0wzszOO1b7SUlJduj6bmvWrKFZs2YleyFhlJGREdHCtaC4uDh2796d/95P2Qryay4oXrZw/t311ZfLQ/g1m19zQclmW7NmDVdeeSWfffYZsbGxdO7cmaSkJKpXr85rr71GlSpVSEpKYvTo0VSrVi1sufIUvF+ta9euLF26lKpVqzJ37lxGjhzJokWLjtlGmf6zzM72RqXs2uU9H+l1MPv37i00NKoSBDdsyzmIiTn4iI0t/L44j6N8Nu2zz0gZOtQrsGJjSX3lFZJ79vQKsKio4/8ZloC8Iah9+/b13VAyv2ZTruLzaza/5gJ/ZzsRzrkVZpZ0rONC1uPnnEvFm8ilhnNuA/AwMAqY5pzrB/wPuCZw+By8ou8/eP++RGbOXxGRMCrqXro8o0eP5u6772bLli3UqFEjbJmONFHJwIEDefDBB3HO8eCDD3LXXXfx6quvhi1XnqLuVwOYOnUqKSkpYc9TInJzYdcu0qZPZ0pqKruWLvXuCTuewu0I91kfJjYW4uOhShXvOT4e6taFpk0LbUv/6CMyP/kE8P5xTk9OJvmuu45erEVHH99QsmJKTkoitWFDX36JS05OpkqVKr78BYNfsylX8fk1m19zgb+zhUMoZ/U80r/AnYs41oC/hCqLHJ+CvX0iUrIKLvpdoUIFunfvTo8ePWjcuDHr168nPT2d008/PSLZipqopHbt2vn7+/fvn79uZrgVNeMiwGuvvRbeIAWZwZ49sH077Nhx8BHs+127SDPL71mbvWBB0T1rlSoVLtaqVIEGDQ7fllfIHbqt4L4gJ5fq2ro1E7/8Mn9oVNd+/eDii0v253cCTvYvcSIixaFpBUVEIuBoSxPceeedPPnkk1x55ZURyVbU9P+bNm2iTp06AMycOZMWLVpEJNuJOuIskPv2Fa9YO/R9YGKuI4qLg6pVvUe1al7B1rJl/vv0+fMLTYiQfvXVJA8ffrBYi4sLulgrScnJyaSmpvqyV01ERIqnTBZ+ZnbY0gMifhbKe23Fn460NMGsWbOoV68erVq1ili2ooZT/vWvf2XlypU450hISODFF1+MWL6jys31CrGtW2HLlkKPtM8/J2X2bDIPHCA9LY3UOnVIzsnxjg9MenVEMTGFC7eaNaFJk4Pv8/YV9f6UU7zhj0fRtW1bJhaYdKDrjTdC8+Yl+IM5fupVExEpG8pc4RcTE8O2bduoXr26ij8pFcyMbdu2ERMTE+koEkZF3UuXlZXFP/7xD9LT0yOarajhlJMnT45AErwZHvOKuCKKucO2b916xN639PLlyQzsy8zNJT0mhuTOnQsXaUUVblWreoVfCKlnTUREQq3MFX7169dnw4YNbNmyJdJRjsu+fft8WwD4NZtfc0Hw2WJiYo45Bb2UPYfeS1e7dm3efffd/N6+DRs20KZNG5YtW8Zpp50WyajFdsQhlXv3Hr1wO3Tb9u1HPklez1vNmtC4MXTo4L2uUePg9rxHjRp0/eijwr1q//d/4KMCSz1rIiISSmWu8IuOjqZRo0aRjnHcFi5cGLa13IrLr9n8mgv8ne1kUdTMmX//+9957733qFChAmeccQYTJ07Mnx0ynIq6l27IkCH5+xMSEli+fHlYZ/U8Lvv3w6ZN8PPPsHEjaR9+SMprr5GZk+MNqfzd70jOzvYKuczMotuIiipcsLVuXahoO6yQq1692Pe8qVdNREROZmWu8BMRyXOkmTO7dOnCyJEjKV++PMOGDWPkyJE88cQTYc93pKUJfMPMu/9t48ajP7Zs8Y4NSMeboAQCQyqzskju2PHohVzVquFZAkC9aiIicpJS4SciZdbRZs7M0759e95+++2I5DvS0gR5fvzxx9CdPK+XLq94C/TWHfbYu/fwz1avDvXqeY+2bQ++Djy6fvMNE2+77eCQymef9dWQShERkZORCj8RKbOONHNmQa+++irXXntthBKeuMPupQu2l27z5sMbq1ChcEGXnFy4qKtb13sc477V5FatSK1SRUMqRUREfESFn4iUWUXNnBkVFZW///HHH6d8+fJcf/31EUxZTGZe0bZ3fWxDAAAgAElEQVR2LWlvvUXKSy8dvJeudm2Sd+woupeuRo2DBVxSklfAHdJTR/XqJTbcUkMqRURE/EWFn4iUaYfOnJk3e+prr73G+++/z/z58/259Mu+fbBuHaxde/hj506giHvpKlcmOSXl8IKubl2oWDFilyIiIiKRp8JPRMq0ombOnDt3Lk8++SSLFi3Kv/8vIsxgwwb4978PL+7+979CE6ZQvz40bQrXXw9nnglNm9J1/Xom3nHHwXvpRo/WvXQiIiJSJBV+IlIixowZwyuvvIJzjpYtWzJx4kQ+/fRT7r77bvbv30/btm2ZMGEC5Ys5Bf+JKmrmzEGDBpGVlUWXLl0Ab4KXf/7zn6ELkZFRdHH3738XXt6gcmWvuOvQAW6+2XvdtCk0aQJxcYc1mwyk1q6te+lERETkmFT4icgJ27hxI+PGjWP16tXExsZyzTXX8Oabb/Lwww8zf/58zjzzTB566CEmTZqUP+wyXIqaOfM///lPyZ/owAGvl66ooZk//3zwOOcgIcEr6Dp2PFjcNW3qDcks5rBT3UsnIiIiwVDhJyIlIicnh7179xIdHU1mZiaVK1emQoUKnHnmmQD5a+eFu/ArCYVmzrzooqKLu//8B7KyDn6oalWvmPvDHwoXd40bH3NWTBEREZGSpsJPRE5YvXr1uPvuuzn99NOJjY2la9euXHPNNQwdOpTly5eTlJTE22+/zfr16yMdNXi7dsHXX5M2ZQopEyZ4M2e++y6peEMsAShfHs44wyvoLrvMew7cf0fNmmFZkFxEREQkGCr8ROSEbd++nVmzZvHDDz9QtWpV+vTpwxtvvMHUqVO58847ycrKomvXroWWUvCN3Fz4/nv4+mtYtergc2Dx9EIzZwLp559P8j33eMVdo0YQHR2h4CIiIiLBU+EnIids3rx5NGrUiJo1awJw1VVX8emnn3LDDTfk32OXnp7Ov//970jG9JZB+OabwgXeN98cnGClXDmvx65dO7j1VkhMpOuvvzLxr389OHPmsGFwxRWRvQ4RERGRYlLhJyIn7PTTT2fp0qVkZmYSGxvL/PnzSUpKyl9KISsriyeeeIL7778/PIEOHID//rdwgbdqlTf5Sp5q1aBVK+jfHxITvddnnw2xsYWaSgZSa9TQzJkiIiJSqqnwE5ET1q5dO3r37k2bNm0oX74855xzDrfeeisPPPAA77//Prm5uQwcOJDf//73JX/yHTu8XruCQzUP7cXLWyLhttu8Ai8x0VvYPMh78DRzpoiIiJR2KvxESpmi1surWLEiDzzwANOnTycqKoqBAwcyePDgsOYaPnw4w4cPL7Ttqaee4qmnniqZExw44N2LV7AX7+uvC/finXrqwV68vAKviF48ERERkZONCj+RUqSo9fKmTp2KmbF+/Xq+++47ypUrx+bNmyMd9bjkL5vw888k169fuMD79tuDvXhRUQd78QYMODhU8zjWwRMRERE5GajwEyllDl0vr27dujzwwAO8+eablCtXDoBatWpFOGUx7NgBX3xB2qRJpEydSuaBA4WXTcjrxQtMtpJ/L57WwhMREREJmgo/kVKkqPXyunbtSkpKCm+99RYzZ86kZs2ajBs3jiZNmkQ67uGys73eu2XL4PPPvcd33wFFLJvQowfJ//ynevFERERESoAKP5FSpKj18qZMmUJWVhYxMTEsX76cGTNm8Oc//zl/GYWIMfPWwitY5H35Jezb5+2vWdNbNuH666FdO7pu2cLE/v0PLpvQv783AYuIiIiInDAVfiKlyJHWy6tfvz5XXXUVAL169aJv377hDxcYsplf5C1bBnn3GsbEQJs2MHCgV+y1awcNGxbqyUsGUuPitGyCiIiISAio8BMpRY60Xl6VKlVYsGABjRo1YtGiRZx55pmhDZI3ZDOvyPv8c1i79uD+s86CSy89WOS1bAnR0cdsVssmiIiIiISGCj+RUuRI6+Xt3buX66+/njFjxhAXF8crr7xScifNG7JZsMj76quDQzZr1fKKuz/9yXs+91w45ZSSO7+IiIiInDAVfiKlTFHr5VWsWJHZs2eXzAl27Ch8X96yZbBli7cvJgbatoXbbz/Ym3f66Zp8RURERMTnVPiJnGTy18rbtYvk7t0PDtnMK/YKDtls1gwuv/xgkdeiRVBDNkVERETEX1T4iZxE0mbMIOWPfyQzK4v0tDRSy5UjOSfH21m7tlfc3XgjnHeehmyKiIiIlCEq/ETKupwcWLAApk0jffJkMrOyAMjMzSU9MZHke+/VkE0RERGRMk6Fn0hZdOAALF4Mb70F77wDW7dCXBxd27dn4tKlZGZleWvlDR8OWjZBREREpMwrF+kAIn40ZswYmjdvTosWLUhJSWHfvn08++yzNG7cGOccW7dujXTEw+XmesXeoEHewue//z1MmQJ/+APMmAGbN5O8cCGp06bRs2dPUlNTtVaeiIiIyElCPX4ih9i4cSPjxo1j9erVxMbGcs011zB16lQuuOACevTo4a815nJzYelSr2dv+nTYtAliY70JWa69Fi67DCpVKvQRrZUnIiIicvJR4SdShJycHPbu3Ut0dDSZmZnUrVuXc845J9KxPGbeDJx5xd6GDVCxolfkXXMN9OgBcXGRTikiIiIiPqLCT+QQ9erV4+677+b0008nNjaWrl270rVr18iGMoMVK2DaNO/xv/9BhQrQvTuMGgVXXAFVqkQ2o4iIiIj4lu7xEznE9u3bmTVrFj/88AM///wze/bsYcqUKeEPYgYrV8K990Ljxt7yCmPGQPPmMGkS/PorzJoF11+vok9EREREjko9fiKHmDdvHo0aNaJmzZoAXHXVVXz66afccMMNoT+5GXz7rder99ZbsG4dREV5E7Tcfz/07Amnnhr6HCIiIiJSpqjwEznE6aefztKlS8nMzCQ2Npb58+eTlJQU2pOuXn1wGOeaNVCuHFxyCfz979CrF9SoEdrzi4iIiEiZpqGeIodo164dvXv3pk2bNrRs2ZLc3FxuvfVWxo0bR/369dmwYQOJiYnccsstJ3aif/8bHn0UWrb0hm+OGAG1a8Pzz3uzc86bB/37q+gTERERkROmHj+RIgwfPpzhw4cX2jZ48GAGDx58Yg1///3BYZyrVnnbLrwQxo+Hq6+GOnVOrH0RERERkSKo8BMJkbS0NCZOnMiudetI3r7dK/hWrPB2dujgTdTSp4+32LqIiIiISAhFpPBzzt0J3AIY8A3QF6gDTAWqAyuAP5nZ/kjkEzlRaZMnk9KvH5nZ2aS/+y6pQPJ558HTT3vF3umnRzqiiIiIiJxEwn6Pn3OuHjAYSDKzFkAUcB3wBDDGzBoD24F+4c4mckIOHIC5c6FPH9JvuonM7GwAMoH0P/0JPv8c7rpLRZ+IiIiIhF2kJncpD8Q658oDlYBNwO+BtwP7JwE9I5RNpHj+9z945BFo1AguvRQWLKDrFVdQKSYGgEqVKtG1d+/IZhQRERGRk1rYCz8z2wg8DfyEV/DtxBvaucPMcgKHbQB045P4V1YWTJ8O3bp5Bd+IEdCsmXcf38aNJM+aRepbb9GzZ09SU1NJTk6OdGIREREROYk5MwvvCZ2rBrwDXAvsAKbj9fQ9EhjmiXOuAfBBYCjooZ+/FbgVoHbt2m2nTp0aruhhsXv3buLi4iIdo0h+zRbOXJV++IE6c+ZwWno60bt2sa92bX7p3p1N3buTddppEc1WHH7NBf7N5tdc4N9sylV8fs3m11zg32x+zQX+zaZcxefXbH7NBf7OdrwuueSSFWZ27EWnzSysD6APMKHA+xuBF4CtQPnAtg7Ah8dqq23btlbWLFiwINIRjqiks3333XfWqlWr/Ed8fLyNGTPGHn74Yatbt27+9tmzZ4c112EyMsxeecWsfXszMIuONuvd22zuXLOcnMhmO05+zWXm32x+zWXm32zKVXx+zebXXGb+zebXXGb+zaZcxefXbH7NZebvbMcLWG5B1GGRmNXzJ6C9c64SsBfoDCwHFgC98Wb2vAmYFYFsEkZNmzZl5cqVABw4cIB69erRq1cvJk6cyJ133sndd98duXBm3mQsr7wCU6fCnj3eUM7Ro+FPf4KaNSOXTURERESkmMJe+JnZ5865t4EvgRzgK+AlYDYw1Tn3WGDbhHBnk8iZP38+Z5xxBg0bNoxskK1bYfJkr+BbvRoqV4Zrr4VbboH27cG5yOYTERERETkOEVnHz8weBh4+ZPN/gfMiEEd8YOrUqaSkpOS/f/bZZ3n99ddJSkpi9OjRVKtWLXQnz82FefO8Yu/ddyE7G9q1g5df9oq++PjQnVtEREREJAwitZyDSL79+/eTlpZGnz59ABg4cCDff/89K1eupE6dOtx1112hOfFPP8Hw4d6snN26wccfw1/+At98A0uXer18KvpEREREpAyISI+fSEEffPABbdq0oXbt2gD5zwD9+/enR48eJXey/fshLc3r3UtP97b94Q/w1FNw5ZVQsWLJnUtERERExCdU+EnEpaamFhrmuWnTJurUqQPAzJkzadHisFU9im/1apgwAV5/3buPr0EDePBB6NsXEhJOvH0RERERER9T4ScRtWfPHj766CNefPHF/G1Dhw5l5cqVOOdISEgotK9Ydu/2FlR/5RX47DOIjobkZG8IZ5cuEBVVQlchIiIiIuJvKvwkoipXrsy2bdsKbZs8efLxN2gGy5YdXIZh925vGYann/aWYahV6wQTi4iIiIiUPir8pFRLS0tj4sSJ7NqwgeStW72C7//9P6hU6eAyDB06aBkGERERETmpqfCTUistLY2U664jc+9e0t99l1QguV07eOklr+irUiXSEUVEREREfEGFn5Ra6W+8QebevQBkAunXXUdyampkQ4mIiIiI+JDW8ZPSad48uqalUSkwhLNSpUp0LTAzqIiIiIiIHKTCT0qfqVPhsstIbtKE1FdfpWfPnqSmppKcnBzpZCIiIiIivqShnlK6PPMM3HkndOwI775LctWqVElIoFOnTpFOJiIiIiLiW+rxk9IhNxeGDvWKvquvhrlzoWrVSKcSERERESkV1OMn/pedDf36weTJcPvtMG6cFl8XERERESkGFX7ib7t3Q+/e8OGH8NhjcN99WpNPRERERKSYVPiJf23eDJdfDl99BRMmwJ//HOlEIiIiIiKlkgo/8af//he6dYONG+Hdd6FHj0gnEhEREREptVT4if989RVceql3b9/8+dChQ6QTiYiIiIiUaprVU/xl/nxvqYaKFeGTT1T0iYiIiIiUABV+4h9Tp3o9fQ0bwqefQrNmkU4kIiIiIlImqPATfxg7FlJSvB6+JUugXr1IJxIRERERKTNU+ElkmcGwYXDHHXDVVd6yDVqYXURERESkRGlyF4mc7Gy45RZ4/XUYOBDGj9fC7CIiIiIiIaDCTyJj927o0wfmzoVHH4X779fC7CIiIiIiIaLCT8JvyxZvYfYVK+Dll71ePxERERERCRkVfhJeP/zgLcy+fr23MPsVV0Q6kYiIiIhImafCT8Lnq6/gsssgK8tbr+/88yOdSERERETkpKBZPSU8Pv7YW5g9Ohr+9S8VfSIiIiIiYaTCT0Lvrbege3dvYfbPPtPC7CIiIiIiYabCT0Jr3DhvYfb27WHxYi3MLiIiIiISASr8JDTM4J57YMgQ6NnTW5i9WrVIpxIREREROSlpchcpeQUXZh8wAJ59Vguzi4iIiIhEkHr8TgI7duygd+/enHXWWTRr1ozPPvuM6dOn07x5c8qVK8fy5ctL7mR79sCVV3pF34gR8PzzKvpERERERCJMPX4ngSFDhtC9e3fefvtt9u/fT2ZmJlWrVmXGjBncdtttJXciLcwuIiIiIuJLKvzKuJ07d7J48WJee+01ACpUqECFChWoWrVqyZ6o4MLsM2dCcnLJti8iIiIiIsdNQz3LuB9++IGaNWvSt29fzjnnHG655Rb27NlTsidZtcpbl2/rVpg3T0WfiIiIiIjPqPAr43Jycvjyyy8ZOHAgX331FZUrV2bUqFEld4IFC+Dii72F2T/5BC64oOTaFhERERGREqHCr4yrX78+9evXp127dgD07t2bL7/8smQanzbNW5i9QQP49FM4++ySaVdEREREREqUCr8y7rTTTqNBgwasXbsWgPnz53N2SRRo48fDddfBeefBkiVQv/6JtykiIiIiIiERdOHnnIt1zjUNZRgJjfHjx3P99deTmJjIypUrue+++5g5cyb169fns88+4/LLL6dbt27BNWYG990Hgwd7C7Onp2thdhERERERnwtqVk/n3BXA00AFoJFzrjUwwsw0i0cp0Lp168PW6uvVqxe9evUqVjsuJwf69oVJk+C22+C557RGn4iIiIhIKRDscg6PAOcBCwHMbKVzrlGIMokPpU2bRtrgwez89VeShw+HBx8E5yIdS0REREREghBs4ZdtZjtd4S/6FoI84kNps2aRkpJCZm4uqRUqkNq6Nckq+kRERERESo1g7/H7f865PwJRzrkmzrnxwKchzCU+kv7CC2Tm5gKQuX8/6enpEU4kIiIiIiLFEWzh91egOZAFpAK7gDuO96TOuarOubedc98559Y45zo45051zn3knFsXeNaMIX6QlUXXr76iUqCHr1KlSnTt2jXCoUREREREpDiCKvzMLNPM7jezc80sKfB63wmcdyww18zOAloBa4B7gPlm1gSYH3gvkTZ+PMmbN5P6yCP07NmT1NRUkpM1p4+IiIiISGkS7Kye73H4PX07geXAi8UpAp1zpwAXAzcDmNl+YL9z7kqgU+CwSXgTyQwLtl0JgS1b4NFH4bLLSH7oIaosXEinTp0inUpERERERIrJmR17jhbn3FigJt4wT4Br8YZ7GlDFzP4U9Am9pSBeAlbj9fatAIYAG82sauAYB2zPe3/I528FbgWoXbt226lTpwZ76lJh9+7dxMXFRToGAE3GjqVuWhpfTJhAZkKCr7IV5Ndc4N9sfs0F/s3m11zg32zKVXx+zebXXODfbH7NBf7NplzF59dsfs0F/s52vC655JIVZpZ0zAPN7JgP4IsjbQP+XzBtFPhcEpADtAu8Hws8Cuw45Ljtx2qrbdu2VtYsWLAg0hE8q1ebRUWZ3X57/ibfZDuEX3OZ+TebX3OZ+TebX3OZ+TebchWfX7P5NZeZf7P5NZeZf7MpV/H5NZtfc5n5O9vxApZbEHVYsJO7xDnnTs97E3idVyrvD7KNPBuADWb2eeD920Ab4FfnXJ1A+3WAzcVsV0rS0KFQuTI88kikk4iIiIiIyAkKdh2/u4BPnHPfAw5oBNzunKuMdz9e0MzsF+fceudcUzNbC3TGG/a5GrgJGBV4nlWcdqUEzZsH778PTz4JNWtGOo2IiIiIiJygoAo/M5vjnGsCnBXYtNYOTujyzHGc96/AG865CsB/gb54M4xOc871A/4HXHMc7cqJOnAA7roLGjWCv/410mlERERERKQEBNvjB9AEaArEAK2cc5jZ68dzUjNbiXev36E6H097UoJeew2+/hreegtiYiKdRkRERERESkCwyzk8jLfUwtnAHOBS4BPguAo/8amMDHjgAejQAfr0iXQaEREREREpIcH2+PXGW3rhKzPr65yrDUwJXSyJiCefhF9+gZkzwblIpxERERERkRIS7Kyee80sF8hxzlXBm3GzQehiSditXw+jR0NKCrRvH+k0IiIiIiJSgoLt8VvunKsKvIy34Ppu4LOQpZLwu/9+yM2FkSMjnUREREREREpYsLN63h54+U/n3Fygipl9HbpYElbLl8PkyXDvvdCwYaTTiIiIiIhICQtqqKdzbn7eazP70cy+LrhNSjEz+NvfoFYtuOeeSKcREREREZEQOGqPn3MuBqgE1HDOVcNbvB2gClAvxNkkHGbOhCVL4J//hCpVIp1GRERERERC4FhDPW8D7gDq4t3bl1f47QKeDWEuCYf9+2HoUGjeHPr1i3QaEREREREJkaMWfmY2FhjrnPurmY0PUyYJl+eeg++/h7lzoXyw8/yIiIiIiEhpE+zkLuOdc+cDCQU/Y2ZawL202rYNRoyA7t2hW7dIpxERERERkRAKqvBzzk0GzgBWAgcCmw1Q4VdajRgBu3bB009HOomIiIiIiIRYsOP7koCzzcxCGUbCZO1aeP556N/fu79PRERERETKtKCWcwC+BU4LZRAJo6FDITYWhg+PdBIREREREQmDYHv8agCrnXPLgKy8jWaWHJJUEjoLFkBaGowcCbVrRzqNiIiIiIiEQbCF3yOhDCFhcuCAt1h7w4Zwxx2RTiMiIiIiImES7Kyei5xzDYEmZjbPOVcJiAptNClxkyfDypWQmgoxMZFOIyIiIiIiYRLUPX7Ouf7A28CLgU31gHdDFUpCYM8euO8+aN8err020mlERERERCSMgh3q+RfgPOBzADNb55yrFbJUUvKeego2bYJ33gHnIp1GRERERETCKNhZPbPMbH/eG+dcebx1/KQ02LgRnnwSrrkGOnSIdBoREREREQmzYAu/Rc65+4BY51wXYDrwXuhiSYl64AFvYpdRoyKdREREREREIiDYwu8eYAvwDXAbMAd4IFShpAR9+SVMmuTN4tmoUaTTiIiIiIhIBAR7j18s8KqZvQzgnIsKbMsMVTApAWZw111Qvbo3sYuIiIiIiJyUgu3xm49X6OWJBeaVfBwpUWlpsHAhjBgBp5wS6TQiIiIiIhIhwRZ+MWa2O+9N4HWl0ESSErF/P/z979CsGfTvH+k0IiIiIiISQcEO9dzjnGtjZl8COOfaAntDF0tO2AsvwLp1MHs2lA/2j1lERERERMqiYCuCIcB059zPgANOA7QKuF/99hsMHw5dusCll0Y6jYiIiIiIRNgxCz/nXDmgAnAW0DSwea2ZZYcymJyAxx6DnTth9Ggt1i4iIiIiIscu/Mws1zn3nJmdA3wbhkxyItatg2efhX79oGXLSKcREREREREfCHpWT+fc1c6p+8j3hg2DihW9mTxFREREREQIvvC7DZgO7HfO7XLOZTjndoUwlxyPRYtg5ky491447bRIpxEREREREZ8IanIXM4sPdRA5Qbm58Le/QYMGcOedkU4jIiIiIiI+ElThFxjieT3QyMwedc41AOqY2bKQppPgvfEGfPklTJkCsbGRTiMiIiIiIj4S7FDP54EOwB8D73cDz4UkkRRfZqY3vPPccyElJdJpRERERETEZ4Jdx6+dmbVxzn0FYGbbnXMVQphLimP0aNi4EaZOhXLB1vIiIiIiInKyCLZKyHbORQEG4JyrCeSGLJUE7+efYdQo6N0bLrww0mlERERERMSHgi38xgEzgVrOuceBT4B/hCyVBO/BByEnxyv+REREREREihDsrJ5vOOdWAJ0BB/Q0szUhTSbHtmoVTJzozeZ5xhmRTiMiIiIiIj511MLPORcDDAAaA98AL5pZTjiCyTGYeQXfqafCAw9EOo2IiIiIiPjYsXr8JgHZwBLgUqAZcEeoQ0kQZs+Gjz+G8eOhatVIpxERERERER87VuF3tpm1BHDOTQC0bp8fZGfD3XdD06Zw222RTiMiIiIiIj53rMIvO++FmeV467hLxL34IqxdC++9B9HRkU4jIiIiIiI+d6zCr5VzblfgtQNiA+8dYGZWJaTp5HA7dsAjj0DnznD55ZFOIyIiIiIipcBRCz8ziwrViQPrAi4HNppZD+dcI2AqUB1YAfzJzPaH6vyl1uOPw2+/eYu2qwdWRERERESCEOw6fqEwBCi4JMQTwBgzawxsB/pFJJWfff89jBsHfftCq1aRTiMiIiIiIqVERAo/51x94HLglcB7B/weeDtwyCSgZySy+do993j39D36aKSTiIiIiIhIKRKpHr9ngKFAbuB9dWBHgTUCNwD1IhHMtz75BN5+G4YNg7p1I51GRERERERKEWdm4T2hcz2Ay8zsdudcJ+Bu4GZgaWCYJ865BsAHZtaiiM/fCtwKULt27bZTp04NV/Sw2L17N3FxcYU35ubS5i9/oeLWrXw+eTK5MTH+yeYDfs0F/s3m11zg32x+zQX+zaZcxefXbH7NBf7N5tdc4N9sylV8fs3m11zg72zH65JLLllhZknHPNDMwvoARuL16P0I/AJkAm8AW4HygWM6AB8eq622bduaX+zdu9fOPfdcS0xMtLPPPtseeughMzO76aabLCEhwVq1amWtWrWyr7766qjtLFiw4PCNU6aYgdnrr4cgefCKzOYDfs1l5t9sfs1l5t9sfs1l5t9sylV8fs3m11xm/s3m11xm/s2mXMXn12x+zWXm72zHC1huQdRhx1rOocSZ2b3AvQB5PX5mdr1zbjrQG29mz5uAWeHOdiIqVqzIxx9/TFxcHNnZ2Vx44YVceumlADz11FP07t37+BreuxfuvRfatoXrry/BxCIiIiIicrKI5KyehxoG/M059x+8e/4mRDhPsTjn8ruNs7Ozyc7OpkQWvB8zBtavh//7Pyjnpz8uEREREREpLSJaSZjZQjPrEXj9XzM7z8wam1kfM8uKZLbjceDAAVq3bk2tWrXo0qUL7dq1A+D+++8nMTGRO++8k6ysYlzWL7/AyJHQqxdcfHGIUouIiIiISFmnLqQSFBUVxcqVK9mwYQPLli3j22+/ZeTIkXz33Xd88cUX/PbbbzzxxBPBN/jQQ5CVBcX5jIiIiIiIyCFU+IVA1apVueSSS5g7dy516tTBOUfFihXp27cvy5YtC66Rr7+GCRNg0CBo0iS0gUVEREREpExT4VdCtmzZwo4dO/j/7d19sFx1ecDx70MCMWmKQVDkTW7VUGRIjBLFah1JGCs1FnWKGGttYHRSpIx9CWPTdqQzzjAT6/iCtZ0URcVWjagQMwbRDJiKNohBAkmIINJUAymgFWtKVBKe/nHOlWsgL7t7ds+Pk+9n5s7d93zZvZzdZ8/ZswA7d+5kzZo1nHzyyWzfvh2o9h6C0O0AAA/vSURBVJ66cuVKTj31cd9Q8XiZcPHFMGMGvOtdw8yWJEmSdBAY+V49u2r79u0sWrSI3bt38+ijj3Luuefymte8hvnz5/Pggw+SmcyZM4fly5fv/8auuw7WrIHLLoMjjhh+vCRJkqROc/BryOzZs7n11lsfd/oNN9zQ0+3E7t2wZEm1eecFFzSVJ0mSJOkg5uBXkFWrVnHVJZfw0y1bOHvlSjjssLaTJEmSJHWAg18hVq1axZsWLuThnTu55pBD+AxwdttRkiRJkjrBwW8Ixpau7vk6P15zOQ/v3AnAw48+ypvf/RGOXNf7w7N12YKeryNJkiSp29yrZyGmjr2QmDwFgJg8haljL2y5SJIkSVJXuMavENNmns5RZ7+Tk392C9/9zdOYNvP0tpMkSZIkdYSDX0GmzTydN8w6jfdt9GGRJEmS1Bw39ZQkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI5z8JMkSZKkjnPwkyRJkqSOc/CTJEmSpI6b3HaARmts6eq+r7tk1i7O6/P6W5ct6PvflSRJkjQY1/hJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUseNfPCLiBMi4msRcUdEbI6IP69Pf1pErImI79W/jxh1myRJkiR1URtr/HYBSzLzFOAlwJ9FxCnAUuD6zJwJXF8flyRJkiQNaOSDX2Zuz8zv1Id/BmwBjgNeC1xZX+xK4HWjbpMkSZKkLorMbO8fjxgDvg6cCvwgM2fUpwfwk/Hje1xnMbAY4Oijjz5txYoVI+s9UBvv/Wnf1z16Kty/s/9/e9ZxT93n+W217a9rEDt27GD69OlDu/1BlNpWaheU21ZqF5TbZlfvSm0rtQvKbSu1C8pts6t3pbaV2gVlt/Vr3rx5t2Tm3P1drrXBLyKmA/8OXJqZV0fEQxMHvYj4SWbu83N+c+fOzfXr1w87tWdjS1f3fd0ls3bxvo2T+77+1mUL9nl+W2376xrE2rVrOeOMM4Z2+4Mota3ULii3rdQuKLfNrt6V2lZqF5TbVmoXlNtmV+9KbSu1C8pu61dEHNDg18pePSPiUOALwKcy8+r65Psj4pj6/GOAB9pokyRJkqSuaWOvngFcAWzJzPdPOGsVsKg+vAj44qjbJEmSJKmL+t+msH8vA94CbIyIDfVpfwssA66KiLcC/wWc20KbJEmSJHXOyAe/zPwGEHs5+8xRtkiSJEnSwaCVz/hJkiRJkkbHwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOm5y2wESwNjS1X1fd8msXZw3wPW3LlvQ93UlSZKkJwPX+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSxzn4SZIkSVLHOfhJkiRJUsc5+EmSJElSx01uO0Aq2djS1QNdf8msXZzX521sXbZgoH9bkiRJGucaP0mSJEnqONf4SU9Sg6yNHGRNJLg2UpIk6cnGNX6SJEmS1HEOfpIkSZLUcQ5+kiRJktRxDn6SJEmS1HHu3EVSo/wKDEmSpPI4+Ek6aLS1J1QHUkmS1DYHP0lqmWtJJUnSsPkZP0mSJEnquKLW+EXEWcBlwCTgo5m5rOUkSTqouXmsJEndUMzgFxGTgH8CXglsA74dEasy8452yyRJpWlrIIV9D6VutitJKlUxgx/wYuDuzLwHICJWAK8FHPwkSRpQqcOyJGk0Shr8jgN+OOH4NuD0llokSdIIlLyWtNRNnUu+zySVKzKz7QYAIuIc4KzMfFt9/C3A6Zl50R6XWwwsro/+NnDnSEOH7yjgR21H7EWpbaV2QbltpXZBuW2ldkG5bXb1rtS2Urug3LZSu6DcNrt6V2pbqV1Qdlu/TszMp+/vQiWt8bsXOGHC8ePr035NZl4OXD6qqFGLiPWZObftjidSalupXVBuW6ldUG5bqV1QbptdvSu1rdQuKLet1C4ot82u3pXaVmoXlN02bCV9ncO3gZkR8VsRcRiwEFjVcpMkSZIkPekVs8YvM3dFxEXAV6i+zuFjmbm55SxJkiRJetIrZvADyMxrgWvb7mhZyZuxltpWaheU21ZqF5TbVmoXlNtmV+9KbSu1C8ptK7ULym2zq3eltpXaBWW3DVUxO3eRJEmSJA1HSZ/xkyRJkiQNgYPfiEXE7ojYEBGbIuJzETGtPv2ZEbEiIr4fEbdExLURcVJ93nUR8VBEfKmUroiYExHrImJzRNweEW8sqO3EiPhOfZ3NEXFBCV0Trnd4RGyLiA8Po6vftgnX2RARQ9mxUp9dz4qIr0bEloi4IyLG2u6KiHkT7qsNEfHziHhd0139tNXn/UP9t78lIj4UEVFI13vqy29qepnRZ88TLluj2snYtyLi7oj4bFQ7HCuh66K6KSPiqH6bhtD1qYi4s76tj0XEoQW1XRERt0X1PPX5iJheQteE2/xQROzot2kYbRHxiYj4z3hs+TankK6IiEsj4q6olm3v6LdrCG03Tri/7ouIlYV0nRmPvR76RkQ8t5Cu+XXXpoi4MiIG+uhZr22xj9ew0eDyv0iZ6c8If4AdEw5/CvgrIIB1wAUTzns+8PL68JnAHwBfKqULOAmYWZ92LLAdmFFI22HAlPq06cBW4Ni2uyYcvwz4NPDhUh7PPa9TWNda4JUTHs9pJXRNOO1pwP8Mo6ufNuClwDepdpI1qb7cGQV0LQDWUH22/Deo9uR8eMt/W0+4bAWuAhbWh5cDby+k6wXAGNUy7aiC7q9X19cN4DOD3F9DaDt8wuH3A0tL6KrPmwv8Kw0sexu+zz4BnDNo0xC6zgc+CRxSH39GKW173O4XgD8poQu4C3heffhC4BNtd1GtdPohcFJ9/N3AW0f5WLKP17A0uPwv8cc1fu26EXguMA94JDOXj5+Rmbdl5o314euBn5XUlZl3Zeb36tPuAx4A9vvFkSNq+2Vm/qI+eQqjWbN9QI9lRJwGHA18dQRNPbW1YL9dEXEKMDkz19Sn78jMh9vu2uPy5wBfHkHXgbYl8BTqN0CAQ4H7C+g6Bfh6Zu7KzP8DbgfOarHnCZetERHAfODz9UlXAk2tzR1omZ+Zt2bm1oZamuy6NmvAzVTfw1tK2//Crx7XqVT/f7TeFRGTgPcC72yop7G2IRq06+3AuzPz0fpyDxTUBlRb9FAtP/pe49dwVwKH14efCtxXQNeRwC8z8676+BrgDxvqOqC2vb2GHfLyvwgOfi2pV2v/PrAROBW4pd2iSj9dEfFiqheZ3y+lLSJOiIjbqd5Vek/9P3arXRFxCPA+4OJhtfTbVntKRKyPiJtiSJst9tF1EvBQRFwdEbdGxHvrF0xtd020kGotx1AdaFtmrgO+RvUO5nbgK5m5pe0u4DbgrIiYFtVmivOAE1rs2ZsjgYcyc1d9fBtwXAFdQ9FkV1SbeL4FuK6ktoj4OPDfwMnAPxbSdRGwKjO3D9ozhDaAS+tN4D4QEVMK6XoO8Mb6eerLETFz0K4G28a9Drh+/A2HArreBlwbEduo/t9cVkDXj4DJETH+Bern0NBzQQOvYYey/C+Jg9/oTY2IDcB64AfAFS33jOurKyKOodpU5fzxd+FKaMvMH2bmbKp3fRZFxNEFdF0IXJuZ24bQsqd+Hs8TM3Mu8EfAByPiOQV0TabaLONi4EXAs4HzCugCfvX3P4vq+0eHpae2+jMcz6Na63IcMD8iXt52V2Z+lerrev6DalBeB+xuq2eEDqauf6ZaqzvoVgSNtmXm+VSbc20BBvlsaSNdEXEs8AYaGEKbbqv9DdWQ/CKqTdn/upCuKcDP6+epjwAfG+C2mm4b9yYGfyOwya6/BF6dmccDH6fa3LnVrnrLgIXAByLiZqo1goM+F5T8GrYoRX2P30FiZ2b+2gelI2Iz1Tsebeq5q96kYTXwd5l5U0lt4zLzvojYRDU8fH5/lx9y1+8AL4+IC6k+q3ZYROzIzKUNd/XTRmbeW/++JyLWUn2eqOm1uL12bQM2ZOY99WVXAi+h+RfP/f6NnQtck5mPNNwzUa9trwduyswd9WW/TPW31/Qmvf38jV0KXFpf9tNUnz9prWcvfgzMiIjJ9bu+xwP3FtDVtEa7IuLvqTb3/9PS2gAyc3dErKDatPLjLXe9gOpNyburLcuYFhF3Z2bfO95osI0JayF/Ua8tHWQrlSYfy23A1fXha+j/cRxGG/WWDC+mWga33hURTween5nfqk/6LIOtjW/yb2wd1esyIuL3qLbuGURTr2GbXv4XxzV+ZbgBmBIRi8dPiIjZQ3qXvhd77YpqL0fXAJ/MzKYHqkHbjo+IqfVpRwC/C9zZdldmvjkzn5WZY1RPpJ8c0tDXc1tEHDG+OU/95PUy4I62u6h2ADKjfgKDatv7ErrGNfHubj/21fYD4BURMbne9O4VVGs6Wu2KiEkRceT4acBshv9Z156XrfW70V/jsRcMi4Avtt01In11RcTbgFcBbxriu+Y9t0XlueOHgbOB77bdlZmrM/OZmTlWPx88PODQ11hbfZlj6t9BteniphK6qD43N68+/AqafeNo0DaolhlfysyfF9L1E+Cp8dhexV9J888F/f6NPaP+PYVqjfLyfV2+6ba9vYYd0fK/XVnAHmYOph/2svcuqs1QrqJaw7KZ6l2I8T0O3Qg8COykesfrVW13AX8MPAJsmPAzp4T7jGrhdjvVZ4puBxaX0LXHZc5jRHv1PMD77KVU28TfVv8eaA9bTd5nEx7PjVR7mzuskK4xqncCDxnW49jnYzkJ+BeqJ/g7gPcX0vWUuucO4Kamlxd9PoZPuGyl2qT4ZuBu4HPUewkuoOsd9fFdVDtp+GghXbvqy48/F1xSwmNJ9eb2N+tlxyaqvf31vSfZJu+zA7ndFh/PGybcZ/8GTC+ka0Z9uY1Um4o/v5T7rD5vLXBWYY/l63nsuX0t8OxCut5L9Rx1J/AXo77P2MdrWBpc/pf4E/V/pCRJkiSpo9zUU5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjrOwU+SJEmSOs7BT5IkSZI6zsFPkiRJkjru/wGW3Wp2GDdpLwAAAABJRU5ErkJggg==\n",
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAABJUAAAE/CAYAAAD/i/LxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3X18zfX/x/HHZ+zKbK4tzEWuks21RIXhayk6IfHdVyWR7yQX35L0+36lVCjkoiKSucpcsxPKvhj5klwUJaJLF5FrtjmY7bx+f3zOzjY759gMx3jdb7fP7Vx9Xuf9PidiT6/3+2OICEoppZRSSimllFJK5YWPtyeglFJKKaWUUkoppQoeDZWUUkoppZRSSimlVJ5pqKSUUkoppZRSSiml8kxDJaWUUkoppZRSSimVZxoqKaWUUkoppZRSSqk801BJKaWUUkoppZRSSuWZhkpKKaWUuq0ZhjHTMIy3c3nuF4Zh9LgBc6hiGIYYhlH4er+3i7GaG4ax70aPo5RSSil1w/9io5RSSimVG4Zh/AGEAulZnp4pIi/erDmIyCM3a6wbRUQ2Avd4ex5KKaWUuv1pqKSUUkqpW8ljIrLG25MoqAzDKCwiad6eh1JKKaXuDLr8TSmllFK3PMMwphiGsSTL43cNw1hrmCINwzhsGMb/GYZx0jCMPwzD6O7mfUoYhrHCMIwThmGccdwPy/L6esMwejvuP2sYxv8MwxjrOPd3wzAeyXJuMcMwPjUM46hhGH8ahvG2YRiFHK8VctSdNAzjN6C9h8/2qmEYi694bqJhGJMc93sahrHXMIxkwzB+Mwzjn1nOy/jsrxqG8RcQm/FclnOGGobxq6N+j2EYnbK8drXPWNIwjFjDMI44Xl+e5bUOhmHsNAzjrGEYmw3DqOvuMyqllFLq9qShklJKKaUKgpeBOo4QpDnQC+ghIuJ4/S6gNFAB6AFMMwzD1RIwHyAWqAxUAi4AH3oY935gn+O93wM+NQzDcLw2E0gDqgMNgCigt+O154EOjucbA108jDEfeNQwjGAwAymgKzDP8fpxx3uFAD2B8YZhNMxSfxdQ0vGZ+rh4/1+B5kAx4E1grmEY5XL5GecARYBwoCww3jHHBsAM4J9AKWAqYDUMw9/D51RKKaXUbUZDJaWUUkrdSpY7Ol8yjucBRMQGPA28D8wF+ovI4Stqh4nIJRHZAKzEDGayEZFTIrJERGwikgy8A7T0MJ8DIvKJiKQDs4ByQKhhGKHAo8AgETkvIscxA5e/O+q6AhNE5JCInAZGuRtARA4A3wIZHUStAZuIbHG8vlJEfhXTBiABMyTKYAeGOz77BRfvv0hEjoiIXUQWAD8DTXLxGcsBjwAxInJGRC47xgczvJoqIt+ISLqIzAIuAU09fJdKKaWUus3onkpKKaWUupV0dLenkoh841hKVhZYeMXLZ0TkfJbHB4DyV76HYRhFMMOfdkAJx9PBhmEUcoQqV/ory/g2RwNPUczOIF/gaGZTDz7AIcf98lnuZ8zHk3lANDAb+AeZXUo4lqMNB2o6xigC/JCl9oSIXHT3xoZhPAO8BFRxPFUUsyspN5/xtIiccfG2lYEehmH0z/KcHy6+c6WUUkrdvrRTSSmllFIFgmEY/QB/4Agw5IqXSxiGEZTlcSXHeVd6GfPKaPeLSAjQIuPt8zidQ5idOaVFpLjjCBGRcMfrR4GKV8zHk0VApGN/p044QiXHcrIlwFggVESKA6uumK/ghmEYlYFPgBeBUo763eTu8x4CShqGUdzNa+9k+ezFRaSIiMTl4n2VUkopdZvQUEkppZRStzzDMGoCbwNPYS6DG2IYRv0rTnvTMAw/x55LHTCDmisFY+6jdNYwjJKYHUB5JiJHMZehjTMMI8QwDB/DMKoZhpGxlG4hMMAwjDDDMEoAQ6/yfieA9Zj7Pf0uInsdL/lhBmkngDRH11JUHqYahBk6nQBz028gIg+f8QtgsmODc1/DMDJCuE+AGMMw7ndslh5kGEb7jH2hlFJKKXVn0FBJKaWUUreSzw3DSMlyLDMMozDmPkrvisguEfkZ+D9gTpaNof8CzmB2J32GuQ/QTy7efwIQCJwEtgBf5mOuz2CGPnscYy/G3I8IzNBlNbALc7+kpbl4v3nA38iy9M2x79MAzJDqDObSOGtuJygie4BxwNfAMaAOsCm39ZgB3mXgJ8wNwwc53nc75mbkHzrm9QvwbB7eVymllFK3ASPzoilKKaWUUgWPYRiRwFwRCfP2XJRSSiml7iTaqaSUUkoppZRSSiml8kxDJaWUUkoppZRSSimVZ7r8TSmllFJKKaWUUkrlmXYqKaWUUkoppZRSSqk801BJKaWUUkoppZRSSuVZYW9PID9Kly4tVapU8fY0bqjz588TFBTklXpvjp3fep27zr0gjZ3fep37nTd3/d507gVp7PzW69x17gVp7PzW69zvvLnr93Znzr0g2LFjx0kRKXPVE0WkwB6NGjWS211iYqLX6r05dn7rde7eqde5e6de5+6d+jt17PzW69y9U69z9069zt079Tp379Tr3Ave2Pmt17nf3oDtkotcRpe/KaWUUkoppZRSSqk801BJKaWUUkoppZRSSuWZhkpKKaWUUkoppW57EydOJCIigvDwcCZMmADArl276NevH3Xq1OGxxx4jKSnphtR7c2ydu/fmfico0Bt1u3L58mUOHz7MxYsXvT2V66JYsWLs3bvXK/XeHDu/9QVt7gEBAYSFheHr63vNYyqllFJKKXU1EydO5JNPPkFEeP755xk0aBC7du0iJiaGY8eOER4ezmeffUZISEie6vv164ePjw9VqlS5YfX5mfvu3bv55JNP2Lp1K35+frRr144OHTrQu3dv53vNmDGDMWPG8NZbb13Xem+OrXP33tzvFLddqHT48GGCg4OpUqUKhmF4ezr5lpycTHBwsFfqvTl2fusL0txFhFOnTnH48GHuvvvuax5TKaWUUkrlnquAYufOnbzwwgv4+flRuHBhJk+eTJMmTXJdGxMTw8mTJylevLjbWnf1NyOY8fRD8tixYxERfvvttwIXEORm7nv37uX++++nSJEiALRs2ZKlS5eyf/9+6tWrB0Dbtm15+OGHr3u9N8fWuXtv7neK227528WLFylVqtRtESipO4NhGJQqVeq26a5TSiml1J3D1dKQjGCmfv36NG7cmK1bt+aptmnTpvTu3dtjbX7HzhpQ7Nq1ixUrVvDLL78wZMgQevTowc6dOxkxYgRDhgzJU+3w4cOZPn2621pP9RnByg8//ECnTp0YM2bMda/P+kNy4cKFs/2Q3KJFC8D8IXnJkiUux/ZUn/WH7BtRn9+5R0REsHHjRk6dOoXNZmPVqlUcOnSI8PBwNm3aBMCiRYs4dOjQda/35tg6d+/N/U5x24VKgAZKqsDRX7NKKaWUuhYazOR9bHAfUBiGwfnz5wE4d+4c5cuXz1Ntxt4q7mo91d+MYMbTD8nx8fFAwQwIcjP3e++9l1dffZWoqCjatWtH/fr1KVSoEDNmzCA+Pp5GjRqRnJyMn5/fda/35tg6d+/N/U5xW4ZK3vbOO+8QHh5O3bp1qV+/Pt98881NG/unn36ifv36NGjQgF9//fWGjPHss8+yePFij+e8/vrrJCYmXpfxIiMj2b59+3V5r6xef/111qxZc93fVymllFJ3FlfBTLdu3ejduzf169enSpUq1K9f/7rXazBzbWOD+4BiwoQJTJ06lYoVKzJ48GBGjRqVp9pXXnmFrl27uq31VH8zghlPPyRPnjyZPn36FMiAIDdzB+jVqxc7duzgq6++okSJEtSsWZNatWoxZswYduzYQXR0NNWqVbsh9d4cW+fuvbnfCW7YnkqGYcwAOgDHRSTC8VxJYAFQBfgD6CoiZwyzTWMi8ChgA54VkW9v1NxupK+//poVK1bw7bff4u/vz8mTJ0lNTb1p4y9fvpwuXbrwn//8BzD39vGGESNGeG3s3EhPT2fEiBHenoZSSimlrhNXe8x069aNHTt2ULRoUc6ePUvx4sXZuXNnrmv37dtHSkoKaWlpbmvd7fOyYMEC1q9fT2RkJC+//DLFihVzOe/81Lvb7yMjmAkMDMx1MHNlfW6DmX//+9+cOnWKwMBAVq1aRePGjZkwYQKRkZHExsZit9vZvHlznmoffvhhLl68iK+vr8va/I4N2QOKoKAgZ0AxZcoUXnjhBYYPH87ChQvp1atXjn+E9FQ7fvx4SpUqxfHjx13WeqqfMWMGTz/9NMuWLcNiseQqmLmW+l69etGrVy8A/u///o+wsDBq1apFQkIC69evp3z58qxcudJlraf6MWPGEBkZyf79+29YfX7nfvz4ccqWLcvBgwdZunQpW7Zs4fjx4wDY7XbefvttYmJibki9N8fWuXtv7ncEEbkhB9ACaAjszvLce8BQx/2hwLuO+48CXwAG0BT4JjdjNGrUSK60Z8+eHM/dTEuWLJEOHTq4fK1y5cpy4sQJERHZtm2btGzZUkREhg8fLs8884w89NBDUqlSJVmyZIm88sorEhERIW3atJHU1NQc7/Xdd9/J/fffL3Xq1JGOHTvK6dOnZeXKlRIaGirly5eXyMhIERFJSkpy1qxevVqaNm0qDRo0kC5dukhycrKcPXtWatasKT/99JOIiPz973+XadOmiYhIUFCQDBo0SGrXri2tW7eW48ePi4hIjx49ZNGiRSIi8uabb0rjxo0lPDxcnn/+ebHb7c5zZs+e7fzcr7/+ujRo0EAiIiJk7969IiKSkpIiPXv2lPvuu0/q168vy5cvFxERm80m3bp1k5o1a0rHjh2lSZMmsm3btmyf/4svvpAuXbo4HycmJkr79u1FRCQmJkYaNGggtWvXltdffz3b9z9kyBBp0KCBxMXFefwc586dExGRli1bypAhQ+S+++6TGjVqyFdffSUiImlpafLyyy9LeHi41KlTRyZNmiQiItu3b5cWLVpI/fr1JSoqSo4cOSIiIhMnTpR7771X6tSpI926dXP56yPj125iYqLL13PLm/U6d+/U69y9U3+njp3fep27d+pvxtgTJkyQ8PBwqV27towfP975/KRJk6RixYpSu3ZteeWVV/JU37VrV6lWrZrUq1dPKleuLPXq1XNZ+8MPP0h4eLicP39eLl++LG3atJGff/4529xfeuklefPNN/NUm1HvrlZEZOHChfLcc885H48YMULeffddZ63dbpewsDDZv3//da/fs2eP1KhRQ06ePCnnz5+Xpk2byosvvih79uyRihUrSpkyZaR8+fLyxx9/uBzbU33ZsmUlLCzMY72IyPTp06Vhw4bSvHlziYmJkYEDB0r//v3ljTfeEBGRBQsWSJs2bfJUu3jxYklMTPRYm9+xr/Taa6/JRx99JCEhIbJu3ToREbHb7RIcHJynWrvd7vzvlpvarPUimb9e9+3bJ/fdd98NqT927JiIiBw4cEDuueceOXPmjPO5tWvXytNPPy2ffvqp2/Hc1ScmJkp6evoNrc/v3B966CG59957pW7durJmzRoRMf/fExYWJjVq1JBXX33V+fPM9a735tg6d+/NvSADtktusp/cnHStB2ZHUtZQaR9QznG/HLDPcX8qEO3qPE/HrRgqJScnS7169aRGjRrSt29fWb9+vfM1T6HSgw8+KKmpqbJz504JDAyUVatWiYhIhw4dZNmyZTnGqVOnjvO9hw0bJgMHDnS+15gxY5znZYRKJ06ckObNm0tKSoqIiIwePdr5l6OEhARp2rSpxMXFycMPP+ysBWTu3LkiYoYu/fr1E5HsodKpU6ec5z/11FNitVqd52QNlTJCl48++kh69eolIuYfgHPmzBERkTNnzkiNGjUkJSVFxo0bJz179pSkpCTZtWuXFCpUKEeodPnyZalYsaLz88TExDjf69SpU5KUlCRpaWnSsmVL2bVrl3MeGX9Ju9rnWLBggYiYodJLL70kIiIrV650/sVk8uTJ8sQTT8jly5ed9ampqdKsWTM5fvy4JCUlyfz586Vnz54iIlKuXDm5ePGi87O6oqGSzr0gjp3fep17wRs7v/U6d+/U57bWXTDUv39/ueeee9wGQ+6CmXXr1kmbNm1k9erVIpL5A2Fu67PO/UYFO55qRUTWrVvnMRRyF8xkjL1hwwZx9XfW61Wvwcy1j+0qoKhVq5bz1/6aNWukYcOGeapNTEyUxMREj7Xu6m9WMOPuh+QaNWpIWFhYgQwIcjt3dwrC/59vxXqd++0tt6HSDVv+5kaoiBx13P8LCHXcrwBkXfR72PHcUfJj0CBw0aacL/Xrg2OtuytFixZlx44dbNy4kcTERLp168bo0aN59tlnPb7tI488gq+vL3Xq1CE9PZ127doBULt2bf74449s5547d46zZ8/SsmVLAHr06MGTTz7p8f23bNnCnj17ePDBBwFITU2lWbNmgLmR36JFi+jXrx+7du1y1vj4+NCtWzcAnnrqKTp37pzjfRMTE3nvvfew2WycPn2a8PBwHnvssRznZdQ2atSIpUuXApCQkIDVamXs2LGAeeW+gwcP8tVXXzFgwAAA6tatS926dXO8X+HChWnXrh2ff/45Xbp0YeXKlbz33nsALFy4kI8//hi73c7Ro0fZs2eP8z0yPs/VPkf16tVdzj3jv8WaNWuIiYmhcGHzt1DJkiXZvXs3u3fvpm3bttjtdkSEcuXKOT9H9+7d6dixIx07dnQ5B6WUUgpcL8UCWLp0KTExMRQqVIj27ds7/9y7XrXulmEdOnSITZs2sWvXLvz9/Z1t/1m5W0a1fft2hg4d6vzzsmzZsi4/s7v6jL18RISFCxeybt06l/XulkJl2LhxI6GhodSoUSPPtd9//73bWnC/FClDXFwc0dHRLmuvR72r5UCvvfYaEydOZMOGDTz55JP07t07z/XLly8HuGq9q6UhH3zwAbt27aJVq1asW7fO7XfnrnbDhg0AHmvzOzbAE088walTp/D19eWjjz6iePHifPLJJzz33HPExsYSEBDAtGnT8lQ7cOBAzp49S+nSpd3WuqufOHEiY8eOJTAwkM6dO9OzZ88bUr9x48Yczw0cOJCBAwc6l1x64q6+Xr16V63Nb31+5+6K1WolNjaWpKQkLBbLTa335tj5rc91rdnJAnZ7tlvr558zc/Zsko4dw/LII5nnZT3Xw3PW1auZvXAhST//jOVvf8t5vof3siYmMnf5cpJ27sTSsmXOmqzzdnFYN25k7urVJL300jV977ebmx0qOYmIGIYhea0zDKMP0AcgNDSU9evXZ3u9WLFizr18/FNT8UlPz/9ks7CnpnIpF3sFNWrUiEaNGlG9enXmzZvHE088gY+PD0lJSfj7+3P69GnS09NJTk7m0qVL+Pr6Ouft6+tLSkoKgHNNe9b9iZKTkxER53MpKSnY7XaX75Uxhs1mc64vzyo5ORm73c7u3bsJDAzk8OHD2dbsJycnU7hwYVJSUpxjXr58mQsXLnDixAn69u3Lhg0bCAsLY+TIkZw7d855TsacRITLly+TnJzMxYsXuXTpEsnJyaSnpzN79uwcf9inpaVhs9mcc7fb7Tm+AwCLxcK0adMICAhwbl75ww8/8N5777Fu3TpKlSpFTEwMZ8+edc4j6/fm6XNcuHDBOce0tDSSk5O5cOGC83NkzDHrnFJSUqhVqxZr164lPT3d+RfC5ORk5s+fz6ZNm/jiiy9466232LJli/Mv2BkuXrzI+vXrSUlJyfHrOi+8Wa9z9069zt079Xfq2PmtvxPmvnjxYlauXImI0KFDB7p06cLMmTP5/PPPKVGiBAC9e/emadOmOWp///13JkyYwJQpU/D19WXIkCGUKVOG48eP89VXXzFp0iT8/Pw4c+ZMjrnkpxZg/fr1VKpUyXmlr0qVKjFmzBj27dtHp06d+Prrr53n7tmzJ1utzWYjISGB+Ph4/P39mT9/Pvfccw/ff/89c+bMYcuWLQQEBNC3b19q1aqVY2x39Rl/Lk6aNIkiRYrw559/8ueff7r83h9//HGaNWtGYGAgVapU4ejRo876Tz75hCZNmrj97+euFmD16tUeawGqVavGuHHjAPjkk08oU6YM69ev59y5cyxYsICpU6fesPozZ85QokQJjh07xty5c5k8ebIzYKhevTrvv/8+d911V57rt2zZgmEY7Nixw2P9gAEDSEpKolChQrzwwgvOK8dNnDiRDz74AD8/PwYNGuSy3l1tnz59uHz5MgEBAW5r8zs2wFtvvZXtccZ577//PkWLFgXMv8e5qndXO27cOFJSUihatKjbWnf19erVY8qUKc6xM8K13NanpKRQr149mjVrxoMPPuix3pVNmzbx9ddfs2nTJuc/RN+MWmf95s1s/uorHrr/fgy7HSM9HdLTnfcNuz3781me++rbb9n0/fd8FxFBZHg4ht2eWWu3Q0ZtxuP0dAwREn/6icHLl3MxLY0vV6zg/fbtaVO1qvN87HYMR5BgXPmc3c6aAwcY+NVXXEhPZ/Xnn/PBAw/Qtnx58zxHjbvb1ceO8cLOnVyw21lttfJxeDjtSpXKPA9y1GW9/+WZM/T+/XcuiLA6Pp5PK1bk0eBgl/XZxnY8XnX+PM+eOoVNhITly5lZogSPZezDdcXnvvK5FampPHXpEjYgYflyPitcmMd8fLLXZIzlghWIxtxMefWKFcQBeYlmstZ/kZCQp/qstSsTE/M1tnXLFoYNG3ZNv+ZvJzc7VDpmGEY5ETlqGEY5IOOfuv4EKmY5L8zxXA4iMg2YBtC4cWO5Mo3eu3cvwcHB5oPJk6/n3J08XTBw3759+Pj4OIOSffv2Ua1aNYKDg6latSr79u2jatWqfPHFFxQqVIjg4GD8/f3x9/fPnDc47xuG4fK1kiVLsnPnTpo3b86yZcto1aqVy/dKTk4mODiYVq1aMXjwYI4dO0b16tU5f/48f/75JzVr1mTcuHFERETw7rvv8uKLL/L111/j6+uL3W5n9erV/P3vf8dqtdKiRQuCg4Px9fUlMDAQX19fDMOgSpUqpKenO7uGMs7x8fEhODgYwzAoWrQowcHBBAUFOT/3I488wowZM/jggw8wDIPvvvuOBg0a0Lp1a5YvX07Lli05cOAAu3fvJigoKNt3AGZ314svvshnn33GU089RXBwMHa7neDgYEqUKIHNZmPNmjW0bds2xzwAj5/jscceIzg4mEKFCjnHvnTpEoZhOOc+Z84c2rdvT+HChTl9+jQNGzbk9OnT7N69m4iICAICAti/fz/33nsvBw8epH379kRFRVG5cmXn+2QVEBBAgwYNrvlfWTJ4s17n7p16nbt36u/UsfNbXxDm7qrb54033uCjjz6iQoUKAIwcOZJHH300R+3u3btZv349P/74o7PbJywsjCpVqtC1a1emTJnicewTJ07QunVrZ8dyx44d+fPPP9m+fTtPP/00UVFRN6QWzH+smzdvHnXq1CEwMJDXXnuNxo0bc/bsWX7++Wc+//xzAgICGDt2LPfdd1+O+rS0NEaMGEFQUBAtWrTA39+f/fv3ExISwscff0xQUBDdunXjt99+w7xGy9XrIyMjWb9+PT/99BN9+vTx+N8uMjKSMWPGAJkdN5GRkaxdu5YtW7awY8cOwsLC8lSblpZG586dmTZtmttayN4xs2PHDrZs2ULx4sV57733qFOnzlU7yvNT37x5c2fHSmxsLG3atKF48eLZOmbi4uJo1KhRnuqfe+455s+fT0BAgMf677//PsdzkZGR1KlT56q/19zV9u/fP1e/V/MztjsZ3Rc9e/bMV+fHtdRbrVZiZ8yg59NPY2nXDtLS4PJl8zbrfRfPWdevZ+Rbb2G7dInENWuI+9e/sDRs6P49rqzfs4eRy5ZhS0sjcfVq4qKisFSsaJ6Tnp69JuNwPG/96y9G/vgjNrudxC++IK5KFSxFi+Y4z91ja2oqI+12bEDiqlXX9EP+MMwf8r/cvDlP9ZOBi477F9PS+DE+nr55GHsXcMFx/0J6Ot9t3kwvf3/w8bnqsf3sWS44ApsLdjtbDx7kmcuXzdcNI/v5WR8XLgw+Pmy22bjgCG0uiLApPZ3ocuVynu/qvmGw4bvvsJ08CY7vbkPZsjx5//0eazKeW7thAzbH7z8bsCYigo5RUZ7rstwmfP45ti1bnPUJDz6IpXPn7OdmPa54LmH+fGyOK43bgIQ2bbA8/fRV6zAMEj79FNvq1Zm1jzyC5Z//zFkLOZ8zDBI++gib1QrApUuXOHr0aL7+XnI7uNmhkhXoAYx23MZnef5FwzDmA/cD57IskytQUlJS6N+/P2fPnqVw4cJUr17d2fo6fPhwevXqxbBhw/L9C2/WrFnExMRgs9moWrVqjg6kK5UpU4aZM2cSHR3NpUuXAHj77bcREaZPn87WrVsJDg6mRYsWvP3227z55psEBQWxdetW3n77bcqWLcuCBQuyvWfx4sV5/vnniYiI4K677nL5F0xPhg0bxqBBg6hbty52u527776bFStW0LdvX3r27Enjxo0JDw93+5eYQoUK0aFDB2bOnMmsWbMAqFevHg0aNKBRo0ZUrlw5V6nxtXyO3r17s3//furWrYuvry/PP/88L774IosXL2bAgAGcOXMGu93OoEGDqFmzJk899RTnzp1DRBgwYADFixfP03ellFIqb641GHK3BAygS5cuVw2F3C3jyi13S7H2799PYGAg999/v9tgJz+14H4ZVkbH7pYtW9i2bRtdu3Z1GQy5Wkb1008/0blzZwzDoEmTJvj4+HDy5EnKlCmTY3xX9WB2XS9dupQdO3Z4/O5cLYUC2LFjB7Vq1cp1KJS1ds2aNVSsWNFjLbheigTm8i1PS9euR72r5UAPPfQQO3bsyFUw465+2rRp+Q5mbtqSHLsdUlMhNRWr1crszz4j6ddfzWUtly9nHqmpHh9bt20jeupUbKmpJKxcSdw//oGlRo3sNRlHRiCTtf7QIaK3b8dmt5NgtRJXsyaWkBDX51/x2HrpEtFpaeYPufHxeQ5WEjB/QAawXbpEwujR116flkbC2rVYiheHQoXMECPrccVzCSdPYnOEIza7nYT0dCxVq7o819XjhHXrsDl+f9uAhKZNzYAh6/mFCmWL7dfvAAAgAElEQVS/n+W5hBkzsK1alVn/+ONYBg7MrMk4fHxyPBeVmEjsv/6F7eJFigQGEjV5MrRrlxmMZNRlPbI8F/X558T+4x/YbDaKFClCVFwc5PLXfJTVSmx0dGbt7Nm5rnVZP3ly/urfe+/a5/7mm3kbOyIie/2QIXmrv+suYr/5JrN+wIDcz93fn9iNGzNrY2LyNvbFi8SuWZNZf5V/sLkj5GbjpWs5gDjMPZEuY+6R1AsoBawFfgbWACUd5xrAR8CvwA9A49yMcStu1H29Zb16282uDwoK8trY+a335tjXWq8bdevcC+LY+a3XuRe8sfNbn98NoxMTE2Xs2LECOC9+cSV3mz4PHz5cYmJiPI7rbtPm3NSKuN90efjw4RIaGip16tSRnj17yunTp92+h6uNk8PDw6VTp05it9vlm2++kSpVqrjcjDY/tVfK2Pj44Ycflvfff9/5fNWqVZ1XhM3K1cbBU6ZMkWHDhkliYqLs27dPwsLC3I7tql5E5N1335UWLVpcdb6uNvAVEXn44YdlypQp11Tbo0cP+de//nXVsd3x5u+1+Ph46dixo8THx1977bJlIhcuiJw7J3L8uMihQyK//iqyZ4/Izp0i33wj8tVXImvWiKxcKbJ0qcQPHixF/PwEkCJ+fhLfq5fIu++KvPWWyLBhIq++KvLSSyIvvijSp4/Is8+K/OMfIl26iFgsEt+woRTx8THrfXwkvmpVkfBwkRo1RCpXFilfXqR0aZGQEJGAAJFChZw7ncSDFAGz1vHY/a4oOY9+jtqMo1/W1wsVMscLDhYpWVIkNFSkQgWRKlXMudWuLf1KlcpeX6GCyMMPi3ToINKxo8iTT5qftUcPkd69RWJiRPr3F3npJenXsGH22mbNRN5/X2TSJJHJk0U++UQkNlZk7lyR+fNFFi8WiY83v/eEBIl/6y0p4u9vfvaAAIkfP15k926Rn34S+eUXkQMHRP78U+TYMZFTp0SSkkRsNpHUVBG7XeLj46VIkSJmfZEiefp1k5/aW6X+Wn+v5Lfem2Pnt17nfu31BQXe3qhbRNz9s0obF+cK0O9GzUUppZRSN5e7DaPB3HNk8ODBnDhxgtKlS+eoddctVL16dY4fP05CQgKVKlVyO3Z+uoXcdfuUKlWKZcuWsWnTJho3bsy4ceOc+yNl5a7bp2/fvjRv3pxWrVoxbNgwXn75ZWbMmOFyDu46fpo3b37Vjp/81ILrjh0fHx82b94MwP79+0lNTXX5381Vt81zzz3Hc889x5w5cyhRogSzZs1yufTNXT3kvtvHVccNwNChQ6+pWwdg5syZ17wH1zV369jtcOkS1mXLmD1nDkm//IKleXO4dMk8Ll7MfuviOeuuXUQvXmx2nKxYQVxkJJbQ0JznZz0yapOTiU5Odu6Vkq+OmdRUEj79NHu9n5/HI+HIkexdL3Y7llq1wNc3+7kuHiesXIlt0yazFkj429+w9OplnptxfsZ9F4+jNmwgtn9/bBcumF0rs2bB4487lxtdTX46R3LUDh2ap+4JS9u2xNWte81L7ywWC3FxcddUn5/aW6U+JCTkmjvz8lPvzbHzW69zv/b6243XNupWt76jRwvkCkSllFLXiatgaNiwYcybN4/g4GDKli3LzJkzKV++fLa6GxUKDRkyhI8++ohJkybx+OOPu63PTzB0o0Kh0NBQ9u7di4+PD88//7xzSZ0r+Ql28lML7oOhJUuWEBERgZ+fn9tgyFUw4+fnx9y5c695GRbkLhRy55qCHRFzSdKFC1iXL2f2vHkk/fgjlgcegAsXzPAl662L56w//kj0F1+Yoc7nnxPXuDGWEiVyFwpdvpx9E9ovv8xfsJOWRsLXX5uhkr9/5hEQAMWKZX/s70/CN99g273brMWxFKlLl8zz/Pxy3s/yXNT//kfsK69kLieaPt0MZvz8zHDGTaiYIUe4MnFi7oOZ8PDstf375y2YqV6duDJlCmQwk/Eed+oP6fpDvlLeo6GSUkopdZu61lAI3AdDr7zyCm3atCEyMpJJkyYxYsQIPv7442y1NyoUio+Pp3Tp0tSrV8/j585vMHQjQqGs/1CzbNkyIiIi3NbnJ9jJTy24D4b+/e9/e2V/nYyNi5NOnMDSurUZ3NhsmWGOh/vWXbuIXro0M9h54AEspUrlDIJc3bfbr/3qQr6+ZqiTlgaALT2dhJ9/xlKtmhncBAVBqVLZgpwrbxNWr8bmuNqeDUho2xZLTEz2cz3cj1q9mtju3TPDlXnzrr1j5rXX8hbM1K9PXKVKGsx4IZhRSilvuC1DJRFx+5clpW5F4uZym0qpO5u3QiHwHAxlOH/+vMs/b29EKHTp0iVGjhzJsGHDcvXd5ScYuhGhUP/+/dm0aRNFixalSpUqTJ061W19foKdmx4KpaWZYY7NBufPZ7+12bBu2ED0pEmZGx8/8QSWSpUyazKOjFAoy2E9ezZzGVZ+Ny5OTyfh+++xVK4MgYHmUbJk5v2AgBz3E1atyr6U6tFHzc1/M851dRsQYG7+e2UwExubt41gGzTIXv/ii3kLdjp21GBGgxmllLopbrtQKSAggFOnTlGqVCkNllSBICKcOnWKgIAAb09FKXUL8WYoBO6DIYDp06fz9NNPU6xYMRIdl/TN6kaEQqGhoSxfvpzevXsTEBDA4cOHadiwIVu3buWuu+7KUZ+fYOhGhEJz5szJ1RIwd6652yctDeuiRcyePZukffuwNG1qhj5ZA6CsxxXPWX/9NfNqVvHxxN11FxbDyAx/UlM9Dp8t2Ll8mYT587H4+0ORIq6PkiWd9xO2bcu+DKt5c/OS0UWKmCFOxm3W+1mei/ryy+xXZcrrlZWuvDrRP/8JubzKj7f3iMl4Dw1mlFJK3Wi3XagUFhbG4cOHOXHihLencl1cvHgxX2FDfuq9OXZ+6wva3AMCAq56uWKlVMHkqtvolVdeYcGCBRQvXpxq1aoRGxvr3JQ4gzdDIXC/hAygd+/ezJ07l1GjRvHhhx/y5ptv5qi/EaHQwIEDncFMlSpV2L59e572BsptMHTTQ6G0tMwgJyUlx33rxo1ET5mS2e3z6KNYypfPGQi5OKypqXnbm6dQIXN5luNIOHMmc9NkERKCgszLtGeEQEFB2W+vuB/1zTfEDhmSufHxvHnQsWOuvrMc3T6DB+etW+fxx+/YzX+VUkqpm+W2C5V8fX25++67vT2N62b9+vU0aNDAK/XeHDu/9QV57kqpW4urUGjRokW88sorHDx4kK1btzrDmiu56zZq27Yt7dq1o02bNrz66quMGjWKd999N1utt0MhcL2ELKvu3bvz6KOPuqy/EaFQXlzvYMgZCp07h+XhhyE52fORlOS8b927l+hNm8wlWFYrcRUqmN0+GcHRpUsex87R7bNiBZaSJbOFPwQFQblyOZ5LSEzEtnWrWQsktGuHZdCgzHMyQqCMw9c320bKOYKdcePyFuzcd98176/j7W6d61GvlFJK3e5uu1BJKaWUul7chUIRERGMGDHC7dW/MnjqNsq4THnTpk1ZvHhxjlpvh0LgOhj6+eefna/Hx8dTq1Ytl7U3IhTK6o8//sjT+yGCdfFiZs+YQdLevVgaNzaDn6QkOHcu837Ww/G89dAhog8eNDt18nKJ9cBACA4m4eJFbOnpgOMS6X5+WB56CIoWNYOcq9xGbdlC7MCBZrdPkSJExcVd+6bLffvCww/n+mvzdrCjoY5SSil1a9NQSSml1G3NXafRG2+8wd69ez12GnkKhY4dO3bVsT11G2WYMWMG3bp1c1nvzVAIXAdDvXr14ttvvyU4OJjKlSu73M8JbkAoBFiXL2fm9Okk/fyzGQqdOQNnz5q3V96/4rH19Gmi09KuvgwsMBBCQrIdCT4+2BwXVHBeYr17dwgOznmEhJi3RYual0/HRbDz/vt56/apU4e40NAC2+2jlFJKqduXhkpKKaVuea6CodOnTzN48GDOnTtHlSpVWLhwISVKlMhW56nTaOnSpW7DnAy5CYU88dRtBPDOO+9QuHBhunfv7rLem6EQuA6GlixZkr8lZDNmkHTkCJZmzbIHQFc5rH/9RfT589iA1StXug6FfHygeHEoUSLzqFQJihcnYft2bN9+CziCofbtsbz6ao4ACV/fHPPO9yXWtdtHKaWUUrcpDZWUUkrd0twFQ9OmTaNhw4ZMmzaN0aNHM3r06Bz7EuVms2tPrhYK5Ya7bqMvv/ySDRs2sHbtWrebbd9qoRBk2VvozBkszZvD6dO5OqwHDxJ99OjVLw9fuHD2UKh0aahRg4Qffsh+JbB27bAMGZL93ODgbPsBZZUjGOrTB5o3z9Vn9nYopJRSSil1q9JQSSml1A03fvx4pk+fjmEY1KlTh9jYWDZv3kxMTAx+fn40atSITz/9lMKFc/6x5C4Yio+PZ+TIkQD06NGDyMjIPG12nVtXW4J2Na66jb788kvmz5/Ptm3bnJ/LlRsaCmVchUzE3DD61Kmcx8mT2R5bf/2V6F9/zd3eQsWLm5eHdxwJQUGZm00DCa1bY+nfP3soVKKEuZeQi2DI5d5ArVrl+nN7+0peSimllFK3Iw2VlFJK3VB//vknkyZNYs+ePQQGBtK1a1fmzZvH8OHDGTlyJE8//TSvv/46s2bNcoY3WbkLho4dO0apUqUAuOuuu1zucXQ9Oo1chUJ54arb6MUXX8Rms9G2bVvA3KzbU8dRVh4vTS8CNpsZBrk4rNu3E/3f/5pXIYuPJ65ECSwpKZCa6n7AYsWgVCkoVYqE1NTsews1b47ln//MFh5RsqQZKF3xPecIhQYOLFBLyJRSSimlVE4aKimllLoqd51GgwcP5syZM7Ro0cJtpxFAWloaFy5cwNfXF5vNRlBQEH5+flSsWBGAtm3bMmrUKJehUm6CIcMw3C4hy2+nkatQaNmyZfTp04ekpCTat29P/fr1Wb16tct6V91Gv/zyS+66jUTMzaaPH4fjx7GuXEn0+PHYUlNJWLGCuGbNsPj7Zw+PLl50/V4+PiT4+WVehUyEhNBQLL16mUvMHMFRtqNkyWx7DOUIhgYPznUwpKGQUkoppdTtR0MlpZRSHnnqNFq7di1Hjhxh3bp1bjuNKlSowODBg6lUqRKBgYFERUXRtWtXhgwZwr59+4iMjGTx4sUcOnTI7RxcBUOhoaGcOnUKgKNHj1K2bFmXtfntNHIVCnXq1IkSJUpcW7hx6RLWuXP5bM4cklq0wHL33c7QyOWRluYsTYDMJWRpaSR8/z2W2rWhYkVo0MAMh9wdxYsTtWJF9lBo9Oib2i2koZBSSiml1O1FQyWllLpDuOo22rRpE3379qVIkSIULVqUmTNnUr169Ry17jqNatasyZEjRzx2Gp05c4b4+Hh+//13ihcvzpNPPslnn33G/Pnz+ec//8mnn35KVFSUx2VproKh33//ndWrV/PEE08wa9YsHn/8cZe17jqN+vfvz/Hjx6/aaeSKy32Jzp6Fv/6Co0fd3x49ivXMGaIxw6EVGzZk7ksUFARly5pHxYrQqFHmY8cRtWcPsUOHYrtwwQyFZs/WJWRKKaWUUsprNFRSSqk7gKtuo/nz5zNy5Ej+/e9/06NHDyZPnszbb7/NzJkzs9V66jTavn07gMdOozVr1nD33XdTpkwZADp37szmzZt56qmnmDRpEpGRkSQkJLB//36383cVDA0dOpS2bdtSo0YNKleuzMKFC13Wuus06tSp09WXoImYl7Q/cgT+/BOOHMG6di3RCxaYnUJWK3GlSmFJSoJLl3LWBwRAuXLmUasWtGplXtr+m28Ax75EzzyDZfJkM1S6Csvf/kZclSoaCimllFJKqVuChkpKKVVAuOo0atu2LUePHqVo0aIcP36cJk2asHz5cpf1V3YblS9fHsMwOH/+PADnzp2jfPnyOeo8dRr961//4uTJkzzxxBNuO40qVarEli1bsNlsBAYGsnbtWho3bszx48cBuHTpEu+++y7//ve/3X52V8FQqVKleP/996/9KmgLFjDnk09I+t//zCVoR45kHo4AiSNHcoRF2Zag2e0klCyJ5dlnzeDorruy34aE5LiSWY59iZ54IleBUgYNhZRSSiml1K1CQyWllCoA3HUabdy40dlt88QTT7hdAuaq2ygqKorp06fToUMHRo4cSUhIiMv9hjx1GmWMn5qa6rbT6P7776dLly40bNiQwoUL06BBA/r06cN//vMfFi5cSEBAAH379qV169Z5+k7cXgUtLQ2OHTODoYwjIyhyHNYDB4i+cAEbsGrt2uxL0CpUMI8HHoDy5TOPChWgfHmiduwgtkePzFDovfdu+hI0pZRSSimlbgUaKiml1E3iqtPI39+f//znP8yePZuiRYvSt29fBgwY4LLeVadRhqSkJNatW0dsbKzLWlfdRnPnzmXp0qWMGjWKF154gTFjxvDSSy8xffr0bLWeOo3Kli1LamrqVTuN3nzzTd58881sz40ZM4b27dvnrePGZjNDoQULiH7rrcyroDVqhMVuN0Ojv/4Cuz17XeHCZudQhQoQHm5eBe377823BBK6d8cyZQoEB191Cpa77ybOz0+XoCmllFJKqTuehkpKKXUTuOs0EhEOHTrErFmzaN26tXNJ2JXcdRplWL58OW3atCEkJMRlvatuo02bNrFr1y5niNWtWzfatWuXo9ZTp9GKFSs4f/48L730Uv47jc6fh8OHM49Dh3LeP30acHEVtP37sdx3H0REZHYaZRzly5sbXfv4OMfOsQSta9dcBUoZNBRSSimllFJKQyWllLppXHUa/ec//2HevHkcPnwYgLJly7qsdddp9NRTTwEQFxdH79693Y7trtto0aJFzg22//vf/3Lvvfe6rHfXaTRmzJirb3YNkJpqdhEdPAgHD2JdvTpzs+v4eOKKFMHi2NspmzJlICwMKleGBx8071esSNSBA8S+8w62ixfNUGjmTF2CppRSSiml1E2moZJSSuWBqyVsMTExrF69mtDQUABmzpxJ/fr1s9W56zSKjo5mwYIFzJ49m6pVqzJp0iRq1KiRY1xP+xqdO3eOrVu3smzZMrfzdtdtFBYWxssvv8zYsWMpUaIEM2bMyNP34ew2OngQS3i4MzTi4EGzuyjj/l9/mVdSc8jWaSRCwt13Y+neHSpWNIOjsDCzyyggwOW4FiCubl1dgqaUUkoppZQXaaiklFK55G4JG0BMTAzDhw93W+uu0+jSpUsEBAQwdepUTp8+zXPPPefySmfuOo0ANmzYQIcOHQhwE8BkcNVt1KlTJ0qUKOE5WLl40Vx6dvAgHDiQ2W20fTvRP/xghkLLl2dudg0QGGgGRJUqwSOPmLdZjqidO4l99tnM5WfvvJOnTiPQUEgppZRSSilv01BJKaXywNNm2Z646zQKCwujc+fOHDhwgE6dOtGzZ0+X9e46jQDWrVvH6NGjr+nzWOPjmTVtGknffYfl7rtzBEccOGBeSe1K5cqRYLdjc3Qf2YCERx7B8vbbZnBUqhQYhttxLTVqEOfvr8vPlFJKKaWUKsA0VFJK3VFcLV/L6PAZMGAAM2bMICUlxWWtuyVs8+bN49NPP2XJkiW0adOG0aNH4+/vn63WXadRSEgIiYmJVK1alQ0bNlCzZk23c3fVaQQwYcIEz906Fy7AH3/Ab79lHr//jnXnTqIPHMAGfLlqVWanUUCAuYdRpUrQoYN5m/G4UiVzaZq/f87NrmNioGFDj99/VtpppJRSSimlVMGmoZJS6o7hbvnas88+y759+zhz5ozHendL2EaNGsVPP/3EAw88QJ8+fXj33Xd5/fXXs9W66zS6cOEC3bt3Z+/evYSGhjJ9+vQ8fSar1UrsjBkk/f47lurVs4VGzvtHj2YvKlIEqlYlwTAy9zUCErp2xfLhh1C6tMcuowy62bVSSimllFJ3Ng2VlFIFiqtOo379+rF9+3ZSUlJo0KABM2fOpGjRoi7rXS1fS09P5+OPP2bVqlUeN7v2tFn2vn378Pf3p2fPnowdO9ZlvatOI39/f1auXHn1K6iJmMvQ9u83j59/xrphA9Fbt5p7GsXHZ3YaGYbZTVS1KrRrZ95WrQp3323eli0LhpGz06h7d/Nqa3mg3UZKKaWUUkrduTRUUkoVGO46jcaPH09ISAjr16/HarXy4YcfMnTo0Bz17pavTZw4kQceeIBy5cp5HN/dErajjk4gEWH58uVERETk6XM5r6CWlISlZctswZHz/v79kJycWeTrS0LRotn3NHrsMSzjxplL1K5YfueKdhoppZRSSiml8kNDJaVUgeKq0ygkJAQwQ50LFy5guFm65Wr52uzZs1m0aJHLvYqu5G4J2yOPPMIff/xBkSJFqF+/Ph9//LHnNxKBQ4fgxx+xLlpE9OzZ2NLTc15BzTDMvYxq1oQePczbmjWhRg2oXJmolSuzdxr17m2+lgfaaaSUUkoppZS6VhoqKaUKDHedRgA9e/YkPj6eevXqMW7cOJf1rpavDR8+3LmvUUBAADabjerVq/PLL7+4fA9XS9jWrVvnevmaiHkFtT174Mcfs986NgNPgOz7GjVrhmXIEDM8qlrV3DTbDe00UkoppZRSSnmTj7cnoJS684wfP57w8HAiIiKIjo7m4sWLdO/enWeeeYaIiAiee+45Ll++nKMua6fRkSNHOH/+PHPnzgUgNjaWRYsWce+997JgwQKX42ZdviYirF27lpdeeom//vqL+fPnO7uN3AVK7ljj4/lw1Cisr78OY8dCz55w//0QEgJVqsCjj8Irr8CqVVC0KDz7LEyZAhs2EDVnDkWKFAEwu42GDoWOHaF2bY+BUgaLxcLAgQM1UFJKKaWUUkrddNqppJS6qdzti9S9e3d69+5NZGQk//jHP5g+fTp9+/bNVutpo2yAQoUK8fe//5333nuPnj175hjb3fK1XBMxr6T244+wezfs3o31f/8jev9+bMAXCQnm8rW77jJDoZ49zdvwcPO2VKkcb2kB4kJCtNtIKaWUUkopVeBoqKSUuulc7YsUFRXF+vXrMQyDJk2acPjw4Rx17jbK/uWXX6hevToigtVqpVatWm7HdrV8LetG2SmOZWmcPJktPHLeP3Mms7BsWRJ8fbMvX+vVC8v06Xn6PnRfI6WUUkoppVRBpKGSUirPxo8fz/Tp0zEMgzp16hAbG8v06dMZNWoUR44c4cSJE5QuXdplrad9kQAuX77MnDlzmDhxYo5ad51GrVu3dgZCDzzwAFOmTMn1Z7EuWED0s89iu3iRhBUriKtdG8uxY3DsWOZJxYub3UZdu0JEhHk/PBzKliXKas2+WbZ2GimllFJKKaXuEBoqKaXyxN3ytQcffJBx48YxdOhQj/WursA2d+5c5xK2F154gRYtWtC8eXOX9a46jTZt2gTgerPsDGlp8PPP8MMP8P335u0PP5Dw+++ZnUZpaST89ReW9u3N8CgjQCpf3rwSmwu6WbZSSimllFLqTqWhklIqz1wtX2vQoAHnzp27aq2nfZFmzZrFmTNnmDp1ap7n5FzCdu4clvvuc4ZGzhBp7164dMk8uVAh8+pqTZoQ9cADxC5ejO3SJbPT6JNPII/BkC5fU0oppZRSSt2JNFRSSuXJ1ZavXY27fZGmT5/Otm3b2LZtGz4+ubwwpQj88gvWKVOI/uADs9No+XJzs+yMc8qXhzp14G9/g7p1zfu1ajmvrGYB4rp21U4jpZRSSimllMojDZWUugPt27ePbt26OR//9ttvjBgxglatWtGvXz98fHyoUqUKn332GSEhIdlqr7Z87Wrc7YsUFBREaGgozZo1A8wOptdffz178cmTsHUrfPONebt1K5w+TQJk3yy7eXMsI0aYAZKLK65dSTuNlFJKKaWUUirvNFRS6g50zz33sHPnTgDS09OpUKECnTp1okuXLjz//PMMGjSIGTNmMGbMGN56661stZ6Wr+WWq32R0tLSsu+JdPEibNliBkgZIdKvv5qv+fiYex117mwuYbtwgdjXXsvcLHvwYNCASCmllFJKKaVuKA2VlLrDrV27lmrVqlG5cmX2799PvXr1AGjbti0PP/xwjlDJ3fK1fBHB+vHHLPj0U5JCQ7EcPw67dsHly+brFSrA/fdDnz7mbaNGULSos9wCxFWpokvYlFJKKaWUUuom0lBJqTvc/PnziY6OBiA8PJxNmzbRqlUrFi1axKFDh3Kc72752qRJk3jrrbc4c+YMdevW5dFHH2X69OmuB01ONjuPtmyBr7/G+tVXRCcnYwOWA3F16mB56SUzQGrSxAyVrkKXsCmllFJKKaXUzaWhklIFkLs9kSIjI3nhhRfw8/OjcOHCTJ48mSZNmrh9n9TUVKxWK6NGjQJgxowZPP300yxbtgyLxYKfn5/LOlfL1wYMGEDdunVzhjoisH+/M0Di669h926w283X772XhLAwbHv3Ao49kVq0wDJ6dN6+FKWUUkoppZRSN5WGSkoVQO72RHr++efp0aMHr776KqtWrWLIkCGsX7/e7ft88cUXNGzYkNDQUABq1arFmDFjiIyMZP/+/axcuTLXc7JarcTGxpL0119YypTJDJC2bIHTp82TihUzu486dYJmzcwupBIliLJaiY2OztwTKQ9Xk1NKKaWUUkop5R1eCZUMw/gX0BsQ4AegJ1AOmA+UAnYAT4tIqjfmp1RBknVPJMMwOH/+PADnzp2jfPnyHmvj4uKcS98Ajh8/DoDdbuftt98mJibm6hM4cgTr++8TPXEitrQ0EpYvJw5znyNq184MkJo2hXvvNTfZvoLFYiEuLk73RFJKKaWUUkqpAuSmh0qGYVQABgC1ReSCYRgLgb8DjwLjRWS+YRgfA72AKTd7fkoVNFn3RJowYQKRkZHExsZit9vZvHmz27rz58/z3//+l6lTpzqfi4uLY+zYsQQGBtK5c2d69uyZvUgEfv8dvvoq8/j1VxIwl63huE147DEss2dD8UPbaPkAACAASURBVOK5/hy6J5JSSimllFJKFSw5WwZujsJAoGEYhYEiwFGgNbDY8fosoKOX5qbUDbdv3z7q16/vPEJCQpgwYQIAS5cupVatWoSHhzNkyBCP75OxJ9KTTz4JwJQpU3jhhRc4dOgQ48ePp1evXm5rg4KCOHXqFMWKFXM+N3DgQObMmcP+/fsZPXo0BsCePfDxx/CPf0DFilCtGvTsCfHxUKcOvP8+UWPHUqRIEQBz+Vrv3nkKlJRSSimllFJKFTw3vVNJRP40DGMscBC4ACRgLnc7KyJpjtMOA1e/3JNSBZS7PZESExPZtGkTu3btwt/f37kczZ0r90SaNWsWy5cvB+DJJ5+kd+/eeZqXddky5k6cSFK1alhOn4b//Q9OnjRfLFcOWraEFi3MI8tSNgsQV6OGLl9TSimllFJKqTuIISI3d0DDKAEsAboBZ4FFmB1Kb4hIdcc5FYEvRCTCRX0foA9AaGhoo/nz59+sqXtFSkoKRYsW9Uq9N8fOb31Bmvu2bduYNWsWH374IW+88QZ/+9vfeOihh3JVO2LECO677z4eeeQRAHr06EFMTAzNmjVjx44dTJ06lWnTprl/A7udor/9RvFvv2XLunX8c98+bJjtgzNLluShJk04V7cuZ+vW5WL58mAY1+1z32r1Onedu459c+p17jr3gjR2fut17jr3gjR2fut17gVv7PzW69yvvb4gaNWq1Q4RaXzVE0Xkph7Ak8CnWR4/g7l30kmgsOO5ZsDqq71Xo0aN5HaXmJjotXpvjp3f+oI09549e8oHH3wgIiL16tWTZ555Rpo0aSItWrSQrVu3uq1LSUmRkiVLytmzZ53Pbdy4UWrUqCF169aVJk2ayPbt27MX2e0ie/aIfPihSOfOIiVLipg7JUm/YsUEc/N8AaRfv355+swiBet7v5XGzm+9zt079Xfq2Pmt17l7p17n7p16nbt36nXu3qnXuRe8sfNbr3O/vQHbJRcZjzeu/nYQaGoYRhHM5W9tgO1AItAF8wpwPYB4L8xNqZsqY0+kUaNGAZCWlkZycjJbtmxh27ZtdO3ald9++w3DRYdQxp5IWT300ENMmzYtc7NrEfj1V0hMhHXrzNu//jJfq1QJLBZo3RpatSLq22+JjY7GZrOZ+yJFRd3Ij66UUkoppZRSqoDzxp5K3xiGsRj4FkgDvgOmASuB+YZhvO147tObPTel8mLfvn1069bN+fi3335jxIgRDBo0CIBx48YxePBgTpw4QenSpV2+x5V7IoWFhdG8eXMMw6BJkyb4+Phw8uRJypQpk6s5Wa1WZk+eTNLy5VjOnjVDpIMHzRfvussZING6Ndx9d7blbJawMOLi4nRfJKWUUkoppZRSueKNTiVEZDgw/IqnfwOaeGE6Sl0Td5ttAxw/fpyEhAQqVark8T3i4uKIjo52Pu7YsSObN28GYP/+/aSmproNpJwuXIANG7BOnkz0ihXYRPhi9WrigoOxREXBq6+aIdI991x1TySLxUJISEhmp5NSSimllFJKKeWGV0IlpW43a9eupVq1alSuXBmAjz76iEmTJvH444+7rTl//jz//e9/mTp1qvO55557jiVLlvD/7d17nFV1ufjxz1cQZUDEBEEFRRT1mIqCRpYlZiBpjdjRzCiVIsuORaV1TLvYRaMLntSsoyWoXUDjlzJHU2dC8XIsjZuKF9ITlJiKlYA6Kox8f3+sBQyw195rzTCzZ4bP+/Xar9m3Z32fvfcze6397LW+++CDD6ZHjx5cf/31Wx76FiM89RTccQfcfjvMnQuvv059t240phPvNwL1Z5xB7Y9/vLUfqiRJkiRJgE0laauYOXPmhj2OZs+eTb9+/Rg+fHjZmFJzIvXo0YOLLrpoyz2FXn01OZTt9tuTZtJf/pJcv//+8KlPwbhxjH35ZaafdZZzIkmSJEmS2oVNJamVmk+23djYyKWXXsrXvva1Fi2rrq6O6dOns3rVKmqHDdvYRLr3XlizBnr1Sg5lO+88GDcOhg7dEFsLzNhhB+dEkiRJkiS1C5tKUis1n2z70UcfZenSpUyaNIkdd9yR5cuXM2LECB566CEGDhxYdjl1N93E6WecQeMbb1A/ezYzYqQW4K1vhc99LmkiHX007LBD5jKcE0mSJEmS1F5sKmmblfXrbc8++yw33ngjffv2Zd9992X69On07ds3cznNJ9s+5JBDWLFiBXPnzmX06NEMGTKEefPmZU+2vWwZ3HYb3Hor9fX1NK5bB0BjjNSPHk3tDTfA4MFb7TFLkiRJkrS1bFftBKRqWf/rbYsWLWL+/PnU1NRw8sknM2bMGKZPn84jjzzC/vvvz3e/+93MZayfbPuDH/xgvkGbmuD+++GCC+Dgg2GffeDcc+Evf2HsBz5ATboXUk1NDWO/8AUbSpIkSZKkDss9lSQ2/fW2vffem7lz5wLw9re/nVmzZmXGlZpsu7lly5bBv/4FM2bArbcmcyS99BJ07w7HHAOTJsGJJ8KwYcmcSOmcSs6JJEmSJEnq6GwqSWz6623NTZs2bZND5PKomz2bX1x5Jauvu47av/wFHngA3nwT+veH2lp4//thzBjYeectYp0TSZIkSZLUWdhU0jav+a+3NXfJJZfQvXt3JkyYkG9BS5ZQ9/Wvc/pvfkNjjPwOmLHPPtR+5StJI+nII2E7jziVJEmSJHUNNpXUaWVNtH3GGWdw/vnns2rVKoYMGcJNN93ELrvskrmc5r/ett4dd9zBPffcw5w5cwghZCexYgXMnAm//CX86U/UA43pTY1A/QknUPvtb7fugUqSJEmS1AG524Q6rayJtqdMmcKIESN46qmnOO6445gyZUrZ5TT/9TZIGkozZ86krq6OmpqaLQMaG5NG0oknwh57wOTJsHYtTJ3K2OnTN8TU1NQwduzYrfqYJUmSJEnqKGwqqUtoPtH27NmzOf744wE488wzueWWWzLjSv1627nnnktjYyNjxozhsMMO49Of/nQyJ9KcOXDWWTBgAJx+Ojz6KHzpS8nfhQvhi1+k9qyzmDFjBuPHj2fGjBlOti1JkiRJ6rI8/E1dQvOJtl944QV23XVXAAYOHMgLL7yQGVfq19uefvpp5s6dm0yW/eij8ItfwN57w7PPQp8+8KEPwUc/mvx6W4k5kpxsW5IkSZK0LbCppE4va6JtgBBC+TmRSqi77jp+88MfsvqVV6j961+he3d43/vgssvgAx+Anj23VuqSJEmSJHVaNpXU6W0+0faAAQM27H303HPPsdtuu1VeSGMj3HwzdT/8IacvWkQj8NvttmPG2WdT+53vQP/+bfgIJEmSJEnqfJxTSZ3e5hNt19bWcueddwJw/fXXc9JJJ5UOXLcO7rkHPvEJGDgQPvpR6p9+euOvt61bR/3229tQkiRJkiSpBPdUUtWtXLmSSZMmsXjxYkIITJs2jR/96EfMnz+f3r17s3LlSvr27cuiRYu2iF0/0fbVV1+94boLLriAMWPGMGzYMPbee29uuummTYOefhpuuCGZK2nZMujdG049Fc44g7ErVzJ9wgQaGxv99TZJkiRJksqwqaSqmzx5MuPGjWPWrFmsWbOGxsZGbrzxxg2TZZ933nnsvPPOJWNLTbS96667ctlll206UfbKlXDjjUkz6YEHIAQYMwa+8x04+WSoqQGglmTPp+nTpzNx4kR/vU2SJEmSpAw2lVRVq1at4t577+W6664DoEePHvTo0WPD7TFGbrrpJu66667cy6yrq2P69Omsfuklanv0gOuvh7o6eOMNOOgg+N73YMIE2HPPkvH+epskSZIkSZXZVFJVLV26lP79+zNx4kQefvhhRo4cyeWXX06vXr0AuO+++xgwYADDhg3Ltby6ujpOP+00Gl9/nfpbbmEGULvrrnD22XDmmTBiRLKXkiRJkiRJahUn6lZVNTU1sWDBAs455xwWLlxIr169mDJlyobbN5+EO1OM0NBA/ec+R+PrrwPQCNSfcAL8/e9wxRUwcqQNJUmSJEmSthKbSqqqQYMGMWjQIEaNGgXAKaecwoIFCwB48803+e1vf8tpp52WvYDXXoOf/QwOPhjGjmXsypXUbL89QDLR9qc+Bc0Op5MkSZIkSVuHTSVV1cCBAxk8eDBLliwBYM6cORx00EEAzJ8/nwMPPJBBgwZtGfjss3DRRTB4cHJoWzp3Uu0LLzBj1izGjx/PjBkznGhbkiRJkqQ24pxKqrorr7ySCRMmsGbNGoYOHcr06dMBuOuuu7Y89O1Pf4If/QhuugnefBPGj4fPfx7e9a4Nh7Y50bYkSZIkSW3PppJabeXKlUyaNInFixcTQmDatGkcddRRAEydOpXzzz+fF198kX79+pWMP+yww5g3b94W119wwQVJY6ipCW6+OWkmPfAA7LQTfPazcO65MHRoWz40SZIkSZKUwaaSWm3y5MmMGzeOWbNmsWbNGhobGwFYsWIF9fX17LXXXoWXWVdXx/XXXMPq666j9q674JlnkgbS5ZfDWWdBnz5b+VFIkiRJkqQibCqpVVatWsW9997LddddB0CPHj3okU6MfdVVV3HFFVdw0kknFVpm3bXXcvqnP01jUxN3ADMOOYTaH/8YTjwRunXbyo9AkiRJkiS1hBN1q1WWLl1K//79mThxIocffjiTJk3i1VdfZfbs2fTr14/hw4fnX9gzz8A551D/yU/S2NQEQCNQ/+53Q22tDSVJkiRJkjoQm0pqlaamJhYsWMA555zDwoUL6dWrFxdffDGXXnopEydOzLeQZ56Bz3wG9t0Xrr2WsccfT03PngDU1NQwduzYNnwEkiRJkiSpJWwqqVUGDRrEoEGDGDVqFACnnHIKCxYsYOnSpUyaNIkhQ4awfPlyRowYwfPPP79p8PLl8B//AfvtBz//OXz84/DUU9TefjszZs5k/PjxzJgxg9ra2io8MkmSJEmSVI5zKqlVBg4cyODBg1myZAkHHHAAc+bMYcSIEcyZM4e5c+cyevRohgwZwrx58zb++tuzz8J3vws/+xmsW5c0ky68EPbee8Nya2tr6dOnT/Lrb5IkSZIkqcOxqaRWu/LKK5kwYQJr1qxh6NChTJ8+vfQdn30WpkyBa65JmkkTJybNpCFD2jVfSZIkSZLUejaVxMqVK5k0aRKLFy8mhMC0adNYvnw5X/rSl/jb3/7GQw89xBFHHJEZf9hhhzFv3rzM25c98AB861tJM+nNN+Gss+Cii2wmSZIkSZLUidlUEpMnT2bcuHHMmjWLNWvW0NjYSN++ffnWt77FtGnTWrTMuro6bvjJT1g9dSq1DQ3Q1LSxmbTPPlv3AUiSJEmSpHZnU2kbt2rVKu69916uu+46AHr06EGPHj3o27cvL7zwQouWWXfDDZz+iU/Q2NTE7cCM976X2quvhqFDt17ikiRJkiSpqvz1t23c0qVL6d+/PxMnTuTwww9n0qRJvPrqqy1b2MqV8NWvUp82lAAagfoDDrChJEmSJElSF2NTaRvX1NTEggULOOecc1i4cCG9evViypQpxRby6qvJr7ntsw9ccglj3/52anbcEYCamhrGjh3bBplLkiRJkqRqsqm0jRs0aBCDBg1i1KhRAJxyyiksWLAgX/Abb8AVVyR7IV14IRx9NCxcSO199zHjxhsZP348M2bMoLa2tg0fgSRJkiRJqobccyqFEHoCe8UYl7RhPmpnAwcOZPDgwSxZsoQDDjiAOXPmcNBBB5UPamqC66+Hb34TnnkGRo+GW26Bo47acJfa2lr69OnD6NGj2zR/SZIkSZJUHbn2VAohfABYBNyRXj4shFDXlomp/Vx55ZVMmDCBQw89lEWLFnHhhRdy8803c+qpp/KHP/yBE088keOPPx7WrYOZM+Ggg2DSJNh9d2hogLvu2qShJEmSJEmSur68eypdDLwNmAsQY1wUQvB34buIww47jHnz5m1y3cknn8wuu+yS7GkUI9x6Kxx+ODzyCBx8MMyeDR/4AIRQnaQlSZIkSVJV5W0qrY0xrgqbNhBiG+SjDqKuro7p06ez+v77qb31VnjwQdhvP/j1r+G002A7p+OSJEmSJGlblrep9FgI4SNAtxDCMOBzwAMtHTSE0Bf4OXAwSXPq48AS4EZgCLAM+FCM8aWWjqGWq6ur4/QPf5jG116j/pZbmLHrrtT+7Gdw5pmw/fbVTk+SJEmSJHUAeXc3+SzwVuANYAawGvh8K8a9HLgjxnggMBx4ArgAmBNjHAbMSS+rCurr6mh87TUAGoH6U09N5lCyoSRJkiRJklK5mkoxxsYY40UxxiNjjEek519vyYAhhJ2BdwPXpsteE2NcCZwEXJ/e7XpgfEuWr1Z6/XXG/u//UpNerKmpYez73lfVlCRJkiRJUseT6/C3EML/sOUcSquAecDVBRtM+wAvAtNDCMOB+cBkYECM8bn0Ps8DAwosU1vDunXw0Y9S++STzPjyl5n+5z8zceJEamtrq52ZJEmSJEnqYEKMlefbDiFcDvQnOfQN4DSSQ+Ai0CfG+LHcA4ZwBPBH4J0xxgfTZa8GPhtj7Nvsfi/FGHcpEX82cDbAgAEDRs6cOTPv0J3SK6+8Qu/evdslft+rrmLwrFk8/ZnPsPzUU9t17K0db+7m3pnGbm28uW97ufu8mXtnGru18eZu7p1p7NbGm/u2l7vP27aZe2dw7LHHzo8xHlHxjjHGiifgT1nXAY/lWUazuIHAsmaX3wXcRjJR9+7pdbsDSyota+TIkbGru/vuu9sn/rLLYoQYP//59h+7DeLNvTrx5l6deHOvTvy2OnZr4829OvHmXp14c69OvLlXJ97cO9/YrY03964NmBdz9HjyTtTdO4Sw1/oL6fn1bbk1OZexvon1PPBMCOGA9KrjgMeBOuDM9LozgdlFlqtWuOkm+OIX4ZRTYOrUamcjSZIkSZI6gVxzKgHnAfeHEP4PCCTzIn0mhNCLjZNrF/FZ4FchhB7AX4CJJJOG3xRC+ATwV+BDLViuirr3XvjYx+Doo+EXv4Dt8vYZJUmSJEnStixXUynG+LsQwjDgwPSqJXHj5Nw/KjpojHERUOrYvOOKLkuJIUOGsNNOO9GtWze6d+/OvHnzNtw2depUzj//fF588UX69eu3Mejxx+Gkk2DoUJg9G3bcsQqZS5IkSZKkzijvnkoAw4ADgB2B4SEEYow3tE1aaom7775706YRsGLFCurr69lrr702vfPf/w7ve1/SSLr9dnjLW9oxU0mSJEmS1NnlOtYphPAN4Mr0dCzwfcDfme8ErrrqKr7//e8TQth45erVcMIJ8K9/we9+B0OGVC0/SZIkSZLUOeWdQOcUkkPTno8xTgSGAzu3WVYqLITA2LFjGTlyJNdccw0As2fPpl+/fgwfPnzjHdeuTSbkXrwYZs2Cww+vUsaSJEmSJKkzy3v422sxxnUhhKYQQh9gBTC4DfNSQffffz977rknK1asYMyYMRx44IFceumlfO1rX9t4pxjhk5+EhgaYPh2OP756CUuSJEmSpE4tb1NpXgihL/AzYD7wCvCHNstKhe25554A7Lbbbpx88sncc889LF26lEmTJrHjjjuyfPlyRuy3Hw+tXs3Ab30LzjqruglLkiRJkqROLdfhbzHGz8QYV8YY/xsYA5yZHganDuDVV1/l5Zdf3nC+vr6eI488khUrVjBz5kyWLVvGoL59WbB6NQMnTYKvfrXKGUuSJEmSpM4u155KIYQ5McbjAGKMyza/TtX1wgsvcPLJJwPQ1NTERz7yEcaNG7fxDrfdBv/8J7z3vfDTn0LzSbslSZIkSZJaoGxTKYSwI1AD9Ash7AKs70b0AfZs49yU09ChQ3n44YdL3rbTk0/CeeexbORIuPlm6J73iEdJkiRJkqRslToMnwI+D+xBMpfS+qbSauDHbZiXtoK6q6/mjsmTebZvX2pvuw169652SpIkSZIkqYsoO6dSjPHyGOM+wPkxxqExxn3S0/AYo02lDqzu5ps5/Zxz+OmaNZy+ejV1Dz5Y7ZQkSZIkSVIXkutYqBjjlSGEdwBDmsfEGG9oo7zUSvW/+hWNMQLQ+Prr1NfXU1tbW+WsJEmSJElSV5F3ou5fAPsCi4A306sjYFOpgxrbowfTgUagpqaGsWPHVjslSZIkSZLUheSdtfkI4KAY011f1OHVPvMMM/bdl+mHHMLEiRPdS0mSJEmSJG1VeZtKi4GBwHNtmIu2lpdfhj/+kdrzz6fP8cczevToamckSZIkSZK6mLxNpX7A4yGEh4A31l8ZY3T3l45o7lxoaoIxY6qdiSRJkiRJ6qLyNpUubssktJU1NEDPnvDOd8If/lDtbCRJkiRJUheU99ff7gkh7A0MizH+PoRQA3Rr29TUYg0N8O53ww47VDsTSZIkSZLURW2X504hhE8Cs4Cr06v2BG5pq6TUCsuXw5NPeuibJEmSJElqU7maSsB/AO8EVgPEGJ8CdmurpNQKDQ3J37Fjq5uHJEmSJEnq0vI2ld6IMa5ZfyGE0B2IbZOSWqWhAQYOhIMPrnYmkiRJkiSpC8vbVLonhHAh0DOEMAb4DfA/bZeWWmTdOvj97+G974UQqp2NJEmSJEnqwvI2lS4AXgQeBT4F/A74alslpRZ65BF48UXnU5IkSZIkSW0u16+/AT2BaTHGnwGEELql1zW2VWJqgfXzKb33vdXNQ5IkSZIkdXl591SaQ9JEWq8n8Putn45apaEB3vpW2GOPamciSZIkSZK6uLxNpR1jjK+sv5Cer2mblNQir78O993noW+SJEmSJKld5G0qvRpCGLH+QghhJPBa26SkFrn//qSxZFNJkiRJkiS1g7xzKk0GfhNC+DsQgIHAaW2WlYpraIDtt4djjql2JpIkSZIkaRtQsakUQtgO6AEcCByQXr0kxri2LRNTQQ0N8I53QK9e1c5EkiRJkiRtAyoe/hZjXAdcFWNcG2NcnJ5sKHUkK1bAwoUe+iZJkiRJktpN7l9/CyH8ewghtGk2apk5c5K/NpUkSZIkSVI7ydtU+hTwG2BNCGF1COHlEMLqNsxLRTQ0wC67wMiR1c5EkiRJkiRtI3JN1B1j3KmtE1ELxZg0ld7zHujWrdrZSJIkSZKkbUSuPZVC4qMhhK+llweHEN7WtqkplyVLYPlyD32TJEmSJEntKu/hbz8BjgI+kl5+BbiqTTJSMQ0NyV+bSpIkSZIkqR3lOvwNGBVjHBFCWAgQY3wphNCjDfNSXg0NMHRocpIkSZIkSWonefdUWhtC6AZEgBBCf2Bdm2WlfNauhblz3UtJkiRJkiS1u7xNpSuAm4HdQgiXAPcDl7ZZVsrnwQfh5ZdtKkmSJEmSpHaX99fffhVCmA8cBwRgfIzxiTbNTJU1NMB22yW//CZJkiRJktSOyjaVQgg7Ap8G9gMeBa6OMTa1R2LKoaEBjjgCdtml2plIkiRJkqRtTKXD364HjiBpKL0P+GGbZ6R8Vq2Chx7y0DdJkiRJklQVlQ5/OyjGeAhACOFa4KG2T0m53H03vPmmTSVJkiRJklQVlfZUWrv+jIe9dTANDdCrFxx1VLUzkSRJkiRJ26BKeyoNDyGsTs8HoGd6OQAxxtinTbNTtoYGOOYY6NGj2plIkiRJkqRtUNk9lWKM3WKMfdLTTjHG7s3Ot6qhFELoFkJYGEK4Nb28TwjhwRDC0yGEG0MIdkuy/PWv8NRTHvomSZIkSZKqptLhb21pMvBEs8vfA/4rxrgf8BLwiapk1Rk0NCR/bSpJkiRJkqQqqUpTKYQwCDgR+Hl6OQDvAWald7keGF+N3DqFhgbYYw846KBqZyJJkiRJkrZR1dpT6UfAl4F16eVdgZXNJgNfDuxZjcQ6vHXrYM6cZC+lEKqdjSRJkiRJ2kaFGGP7DhjC+4ETYoyfCSGMBs4HzgL+mB76RghhMHB7jPHgEvFnA2cDDBgwYOTMmTPbK/WqeOWVV+jdu/eGy72XLOGIT3+axy+8kBU5Dn/bPL41Y3emeHM39840dmvjzX3by93nzdw709itjTd3c+9MY7c23ty3vdx93rbN3DuDY489dn6M8YiKd4wxtusJ+C7JnkjLgOeBRuBXwD+A7ul9jgLurLSskSNHxq7u7rvv3vSK7343Rojx+edbFt+asTtRvLlXJ97cqxNv7tWJ31bHbm28uVcn3tyrE2/u1Yk39+rEm3vnG7u18ebetQHzYo4eT7sf/hZj/EqMcVCMcQjwYeCuGOME4G7glPRuZwKz2zu3TqGhAQ49FAYMqHYmkiRJkiRpG1bNX3/b3H8CXwwhPE0yx9K1Vc6n42lshPvv91ffJEmSJElS1XWv5uAxxrnA3PT8X4C3VTOfDu/ee2HNGptKkiRJkiSp6jrSnkqqpKEBevSAd72r2plIkiRJkqRtnE2lzqShAY4+Gmpqqp2JJEmSJEnaxtlU6iyefx4efdRD3yRJkiRJUodgU6mz+P3vk782lSRJkiRJUgdgU6mzaGiAXXeFww+vdiaSJEmSJEk2lTqC119/nbe97W0MHz6ct771rXzjG98A4Mc//jETJkwghMA/7rwTjjsOtvMlkyRJkiRJ1WeHogPYYYcduOuuu3j44YdZtGgRd9xxB3/84x955zvfydSpU9l7jz3ghRc89E2SJEmSJHUY3audgCCEQO/evQFYu3Yta9euJYTA4YcfzqpVq+C115I72lSSJEmSJEkdhHsqdRBvvvkmhx12GLvtthtjxoxh1KhRG298/XUYOhT23rt6CUqSJEmSJDVjU6mD6NatG4sWLWL58uU89NBDLF68GICwdm3SVDrmmCpnKEmSJEmStJFNpQ6mb9++HHvssdxxxx0A9HnsMYgRRo+ubmKSJEmSJEnN2FTqAF588UVWrlwJwGuvvUZDQwMHHnggAG+ZPz+509FHVys9SZIkSZKkLdhU6gCee+45jj32WA499FCOPPJIxowZw/vf/36uuOIK3jFzJsuBQ48+mkmTWy2M7AAAF59JREFUJlU7VUmSJEmSJMBff+sQDj30UBYuXLjF9Z/72Mf47Be+QPj61+Gb36xCZpIkSZIkSaW5p1IHVve97/HZdeuo69mz2qlIkiRJkiRtwqZSB1VXV8fpU6dyFXD6t79NXV1dtVOSJEmSJEnawMPfOoghF9y2yeV/NlxDY1MTAI2NjUz45jXs+kC3Dbcvm3Jiu+YnSZIkSZLUnHsqdVA9h4wgdN8BgNB9B3oOGVHljCRJkiRJkjZyT6UOqmbYKPrVfpkDX57PkzuNpGbYqGqnJEmSJEmStIFNpQ6sZtgoTj1kJFMf9WWSJEmSJEkdi4e/SZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwmwqSZIkSZIkqTCbSpIkSZIkSSrMppIkSZIkSZIKs6kkSZIkSZKkwrpXOwG13pALbsu87bxDmjirxO3LppzYlilJkiRJkqQuzj2VJEmSJEmSVFi7N5VCCINDCHeHEB4PITwWQpicXv+WEEJDCOGp9O8u7Z2bJEmSJEmS8qnGnkpNwHkxxoOAtwP/EUI4CLgAmBNjHAbMSS9LkiRJkiSpA2r3plKM8bkY44L0/MvAE8CewEnA9endrgfGt3dukiRJkiRJyqeqcyqFEIYAhwMPAgNijM+lNz0PDKhSWpIkSZIkSaogxBirM3AIvYF7gEtijL8NIayMMfZtdvtLMcYt5lUKIZwNnA0wYMCAkTNnzmy3nNvSo8+uKnn9gJ7wwmtbXn/InjtXjM0bn+WVV16hd+/eFe/XEePN3dw709itjTf3bS93nzdz70xjtzbe3M29M43d2nhz3/Zy93nbNnPvDI499tj5McYjKt4xxtjuJ2B74E7gi82uWwLsnp7fHVhSaTkjR46MXcXe/3lrydMVv7yl5PV5YvPGZ7n77rtb9ZiqGW/u1Yk39+rEm3t14rfVsVsbb+7ViTf36sSbe3Xizb068ebe+cZubby5d23AvJijv1ONX38LwLXAEzHGy5rdVAecmZ4/E5jd3rlJkiRJkiQpn+5VGPOdwMeAR0MIi9LrLgSmADeFED4B/BX4UBVykyRJkiRJUg7t3lSKMd4PhIybj2vPXCRJkiRJktQyVf31N0mSJEmSJHVONpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJhNpUkSZIkSZJUmE0lSZIkSZIkFWZTSZIkSZIkSYXZVJIkSZIkSVJh3audgKpvyAW3lbz+vEOaOCvjtmVTTmzLlCRJkiRJUgfnnkqSJEmSJEkqzKaSJEmSJEmSCrOpJEmSJEmSpMJsKkmSJEmSJKkwm0qSJEmSJEkqzKaSJEmSJEmSCrOpJEmSJEmSpMJsKkmSJEmSJKkwm0qSJEmSJEkqrHu1E1DnN+SC20pef94hTZxV4rZlU06sGJs3XpIkSZIkVYdNJXVqRRtaYFNKkiRJkqStwaaStmnuZSVJkiRJUsvYVJKqpLV7WbWmISZJkiRJUmt1qKZSCGEccDnQDfh5jHFKlVOSuqTW7mVVzYaYe4hJkiRJUsfQYZpKIYRuwFXAGGA58KcQQl2M8fHqZiapK+nMDTH3TpMkSZLUkXSYphLwNuDpGONfAEIIM4GTAJtKkrQVdOaGmLnniy01viRJktRWOlJTaU/gmWaXlwOjqpSLJEmdXmduiHWmRmJr4829ZWO3Nt7cWzZ2a+O7Su6SpESIMVY7BwBCCKcA42KMk9LLHwNGxRjP3ex+ZwNnpxcPAJa0a6Ltrx/wjyrFV3Ps1sabe3Xizb068eZenfhtdezWxpt7deLNvTrx5l6deHOvTry5d76xWxtv7l3b3jHG/hXvFWPsECfgKODOZpe/Anyl2nlV+wTMq1Z8Ncc2d3M3984xtrlve2Obu7mbe+cY29zN3dwd29w75thd7bQdHcefgGEhhH1CCD2ADwN1Vc5JkiRJkiRJJXSYOZVijE0hhHOBO4FuwLQY42NVTkuSJEmSJEkldJimEkCM8XfA76qdRwdzTRXjqzl2a+PNvTrx5l6deHOvTvy2OnZr4829OvHmXp14c69OvLlXJ97cO9/YrY03d3WcibolSZIkSZLUeXSkOZUkSZIkSZLUSdhU6qBCCNNCCCtCCItbEDs4hHB3COHxEMJjIYTJBeN3DCE8FEJ4OI3/Zgty6BZCWBhCuLUFsctCCI+GEBaFEOa1IL5vCGFWCOHJEMITIYSjCsQekI67/rQ6hPD5AvFfSJ+zxSGEGSGEHQvmPjmNfSzPuKXqJITwlhBCQwjhqfTvLgViT03HXhdCOKIFY/8gfd4fCSHcHELoWzD+22nsohBCfQhhj7yxzW47L4QQQwj9Co59cQjh2Wav/QlF4tPrP5s+/sdCCN8vMPaNzcZdFkJYVDD3w0IIf1z/PxNCeFvB+OEhhD+k/3f/E0LokxFb8r2lQM1lxVesuzKxuWquTHzemiv7vlqu7sqMnavmyo2ds+ayxq9Yd2Vic9Vcmfi8NVdyfRSSH/V4MITwdPo4ehSIPTeNq/Q+kRX/qxDCkpC8V08LIWxfMP7a9LpHQrKu6p03ttntV4QQXmlB7teFEJY2e90PKxAbQgiXhBD+HJJ16+cKjn1fs3H/HkK4pWD8cSGEBWn8/SGE/QrEvieNXRxCuD6EUHb6h7DZNkyeeisTm6veysTnqrcy8RXrLSu22fVl663M2BXrrUJ8rprLiM1Vb2XiK9ZbhfjcNRdKbPeG/OvVUrFFtuVKxRfZlisVn3e9mrm9H/Jty5Uau8i2XMnxQ771aqmxi2zLlYovsi1XKj7venWLz0l5661MfK6ay4gtUm+l4nPVW1Z8s9vK1lzG2Lnrrcur9s/PeSp9At4NjAAWtyB2d2BEen4n4M/AQQXiA9A7Pb898CDw9oI5fBH4NXBrC/JfBvRrxXN3PTApPd8D6NvC5XQDngf2znn/PYGlQM/08k3AWQXGOxhYDNSQzHf2e2C/onUCfB+4ID1/AfC9ArH/BhwAzAWOaMHYY4Hu6fnvZY1dJr5Ps/OfA/47b2x6/WCSyf7/Wq6GMsa+GDg/52tVKv7Y9DXbIb28W5Hcm90+Ffh6wbHrgfel508A5haM/xNwTHr+48C3M2JLvrcUqLms+Ip1VyY2V82Vic9bc5nvq5XqrszYuWquTHzemqu4TsiquzJj56q5MvF5a67k+ojk/fXD6fX/DZxTIPZwYAgV1jVl4k9IbwvAjFJjV4hvXnOXkf7v5IlNLx8B/AJ4pQW5XwecUqHesmInAjcA21Wot4rbEMD/A84oOP6fgX9Lr/8McF3O2HcAzwD7p9d/C/hEhedgk22YPPVWJjZXvZWJz1VvZeIr1ltWbN56KzN2xXqrEJ+r5rJyz1NvZcauWG9Z8SRf2ueuuVK1Qf71aqnYIttypeKLbMuVis+7Xi35P0H+bblSY19M/m25UvF516slc292e6VtuVJjF9mWKxWfd726xeekvPVWJj5XzWXEFqm3UvG56i0rPm/NZYydu966+sk9lTqoGOO9wL9aGPtcjHFBev5l4AmShkfe+BhjXP+N1PbpKeaNDyEMAk4Efp476a0khLAzyYfmawFijGtijCtbuLjjgP+LMf61QEx3oGdIvpGqAf5eIPbfgAdjjI0xxibgHuCD5QIy6uQkkjc+0r/j88bGGJ+IMS7Jk2xGfH2aO8AfgUEF41c3u9iLjLor8//xX8CXs+JyxOeSEX8OMCXG+EZ6nxVFxw4hBOBDJB8ciowdgfXfSO1MmbrLiN8fuDc93wD8e0Zs1ntL3porGZ+n7srE5qq5MvF5a67c+2rZutsK78lZ8Xlrruz45equTGyumisTn7fmstZH7wFmpdeXrLms2BjjwhjjslLj5Yz/XXpbBB4iu+ay4lfDhue9JyXqJis2hNAN+AFJvRXOvdJjrhB7DvCtGOO69H5Z9VZ27PTb8/cAJfccKRNfseYyYt8E1sQY/5xen1lvaX6bbMOkr1PFeisVm+aUq97KxOeqtzLxFestKzZvvWXFF5ERn6vmyo1dqd7KxOder5aI35UCNZch13q1lDzr1ArxubflMuJzrVfLyLUt10ZyrVfLybMtlyF3zWWouF4t8zkpV71lxeepuTKxueqtTHyueqvwGbFszW3lz5ddkk2lLi6EMITkW7IHC8Z1S3fbXAE0xBiLxP+I5B9zXZExm4lAfQhhfgjh7IKx+wAvAtNDshvyz0MIvVqYx4cpsEKIMT4L/BD4G/AcsCrGWF9gvMXAu0IIu4YQaki+pRhcIH69ATHG59LzzwMDWrCMreHjwO1Fg0Kyq/szwATg6wXiTgKejTE+XHTMZs5Nd6GdVm7X3wz7k7x+D4YQ7gkhHNmC8d8FvBBjfKpg3OeBH6TP2w+BrxSMf4xkgwLgVHLU3WbvLYVrrqXvTRVic9Xc5vFFa655fNG6K5F7oZrbLL5wzWU8d7nqbrPYwjW3WXzumtt8fQT8H7Cy2UbocjKadK1cl5WND8lhSB8D7igaH0KYTvK/ciBwZYHYc4G6Zv9vLcn9krTm/iuEsEOB2H2B00JyWMbtIYRhLRgbkg8rczb7IJAnfhLwuxDCcpLnfUqeWJJGTPew8ZCMUyj/Hrf5Nsyu5Ky3ErFFZcbnqbes+Dz1lhGbu97K5F6x3srE5625cs97xXrLiM9Vbxnx/6BYzZXa7s27Xm3NNnOe+Err1ZLxOderW8QWXKdm5Z53vVoqPu96tdzzlmedWiq+yHq1VHye9WrW56S89daaz1l5YsvVW2Z8znorGZ+z5srl3prPDl1H7AC7S3kqfSLZXbrw4W/N4nsD84EPtmIZfYG7gYNz3v/9wE/S86Np2eFve6Z/dwMeBt5dIPYIoAkYlV6+nIzdPysspwfJRsGAAjG7AHcB/Um+Hb0F+GjBcT+Rvmb3Aj8FflS0Tkg2fpvf/lLRGiPHLtMV4i8CbobkFyaLxqe3fQX4Zp5Ykr3CHgR2Ti8vo8JhBiWetwEkhzxuB1wCTCsYv5hkYz0AbyM5FLLk4y/zvP0UOK8Fr/kVwL+n5z8E/L5g/IEku13PB74B/LNC/CbvLUVqrlR8kborE5u35jLfFyvV3ObxReuuxPNWtOY2j89dcxWeu4p1V2LsojW3eXyhmktj1q+Pjgaebnb94FL/TxmxBze7ruzrlSP+Z+R4jy4T3w34CTAxZ+y7gfvZeJhAxcORNh+b5HDEAOxA8m105uEZJWJfWV8naf3f18LHffv62imY+2/ZuG7/EvDzArFHAfeRNJi+AyzKiNliGwbol6feSsVudnvZessRX7becsRn1lvG494jb71ljZ233srEV6y5HI+7bL2VGTtXvZWJz1Vz6X232O4l53q1VGyz2+ZSeZ1aLr7ierVcfHp95no143EXWaeWis+9Xs2Iz7VerfC85Vmnlho793o1I77iepWMz0kF6q3s56xyNZcjtmy9VYrPUW+l4n+Qp+bKPG+FtuO68qnqCXgq8+K0oqlE0tS4E/jiVsjj6+Q/Pvm7JN/iLSPpdDcCv2zF2BfnHTu9/0BgWbPL7wJua8G4JwH1BWNOBa5tdvkM0g2NFj72S4HPFK0TYAmwe3p+d2BJ0Rort1KoFA+cBfwBqGlJfLPb9ipX/2zaVDqE5FvpZempiWSPsYEtHLvi/16J5/0O4Nhml/8P6F/geesOvAAMasFrvop0JUyyIbS6Fc/7/sBDZWK3eG8pWHOZ702V6i4rNm/NlRs7Z81tEl+k7nKMXbbmMp73IjWX9dxVrLuMsXPXXI7HXrbmNrvv10k+4P2DjR92jwLuzBl7frPLyygwf1/zeJIN9ltI53opGt/suneT48uXNPYbJOvV9fW2jmbNjhaMPbrA2OcDTwL7NHvNV7XgeesH/BPYseDz9iWSw9Gb/68+3sLHPRa4KeP+pbZhfpWn3jJif9ns9rL1Vi4+T71VGr9cvWXEvpS33nKOnVlvWfF5aq7C81ax3jLib8tbbzkfe2bNlVjexST/b7nXq5vHNrs8lxzbcqXiKbAtlzV+s+eu4meZNPZrFNyWqzD2kDxjb/a8516vZjxvubflSoxdaFuuwmMvuV4l43NS3nrLis9Tc+Vi89RbpbEr1VtG/Jw8NZdz7Nz11hVPHv7WBYUQAskxn0/EGC9rQXz/kM68H0LoCYwhWbFXFGP8SoxxUIxxCMnhY3fFGD9aYOxeIYSd1p8nWRHn/gW8GOPzwDMhhAPSq44DHs8b38zpFD8W+m/A20MINelrcBzJ/CG5hRB2S//uRfKt3K8L5gBQB5yZnj8TmN2CZbRICGEcyS7gtTHGxhbEN9+1/STy192jMcbdYoxD0tpbTjI58PMFxt692cWTKVB3qVtIJngkhLA/G/d2y+u9wJMxxuUFx4XkuPtj0vPvAQodPtes7rYDvkoyEW2p+2W9t+Squda8N2XF5q25MvG5aq5UfN66KzN2rpor87zlqrkKz3vZuisTm6vmyjz2vDVXan30BMkeKKekdytZc61Zl5WLDyFMAo4HTo/pXC8F4peE9Fek0uemtlROGbHzY4wDm9VbY4yx5C9Slcl992Zjj6dEzZV53jbUG8lr/+fNYyvEQ/Ka3RpjfL30s1b2Nd85rXOaXZf3ca+vtx2A/ySj3jK2YSaQo95au/2TFZ+33krFAx/LU28ZY++St97K5F6x3srFk6PmKjzvFest43k7iRz1VuGx56q5Mtu9Fderrd1mzoovsF7Niq+4Xs2I/VPebbkyY+ddr2Y9dxXXqxWe94rbcmXi865Xsx57xfVqmc9JubbjWvM5Kys2b72Vic+1HZcRvyBPzZUZu7WfHbqOana0PGWfSBoazwFrSQq87C+VbBZ7NMmxto8Ai9LTCQXiDwUWpvGLqbB7fJnljKbg4W/AUJLdOB8mOTb4ohaMexgwL83/FmCXgvG9SL7Z2rkFY3+T5M1sMcmvpexQMP4+kjfnh4HjWlInJPM/zCFZGf0eeEuB2JPT82+QfNOS+e1/RvzTJL94sr7uyv0CQ6n4/5c+d48A/0O6e2/R/w8qfyNcauxfAI+mY9eRfmNTIL4HyTeri4EFwHuK5E7yKzmfbuFrfjTJ7s4Pk+zGO7Jg/GSSjfU/k8wbkbXrccn3lgI1lxVfse7KxOaquTLxeWuu4vtqVt2VGTtXzZWJz1tzmblXqrsyY+equTLxeWuu5PqIZF3xUPr6/4YS77VlYj+X1lsTyUZ81mEtWfFNJN9er388WYf0bBFPsov8/6av+2KSvWD65B17s/uUOxwpK/e7mo39S9JfSssZ25fkG+1HSb5RHl5k7PS2ucC4Cu9xWeOfnI79cLqcoQVif0DSFFgCfL7c+M2WNZqNhzJVrLcysbnqrUx8rnorFZ+33rLGzltvZXKvWG8V4nPVXFbueeqtzNgV661CfK6aI2O7lxzr1TKxubblysTnXa9mxVdcr2bFbnafZWT/ElfW2HnXq1nxFder5XInx7ZcmbHzrlez4vOuV7f4nJSn3irE5625UrFFPjuUis+1HZcVX6DmSo2d+7NDVz+t38VOkiRJkiRJys3D3yRJkiRJklSYTSVJkiRJkiQVZlNJkiRJkiRJhdlUkiRJkiRJUmE2lSRJkiRJklSYTSVJkiRJkiQVZlNJkiRJkiRJhdlUkiRJkiRJUmH/H8GgfaEKBa7NAAAAAElFTkSuQmCC\n",
       "text/plain": [
-       "<Figure size 1080x360 with 1 Axes>"
+       "<Figure size 1440x360 with 1 Axes>"
       ]
      },
      "metadata": {
@@ -2851,10 +2856,11 @@
     }
    ],
    "source": [
+    "pca = pipeline.named_steps['pca']\n",
     "m = len(pca.explained_variance_ratio_)\n",
-    "plt.figure(figsize=(15,5))\n",
+    "plt.figure(figsize=(20,5))\n",
     "plt.bar(x=range(m), height=pca.explained_variance_ratio_ * 100)\n",
-    "plt.xticks(range(m), ['PC'+str(i) for i in range(1,m+1)])\n",
+    "plt.xticks(range(m), [str(i) for i in range(1,m+1)])\n",
     "plt.title(\"Explained variance\")\n",
     "plt.ylabel(\"Percentage\")\n",
     "\n",
@@ -2871,13 +2877,6 @@
     "plt.grid()\n",
     "plt.show()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/models.ipynb b/models.ipynb
index f26a577..27db8e7 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -121,7 +121,7 @@
        "(1451, 83)"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }

From 2db5d1172cc075ad263887cbe16a277d3b536ee7 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Wed, 17 Apr 2019 23:13:12 +0100
Subject: [PATCH 10/24] optimizing......

---
 OurPipeline.py        |  17 +-
 analysis.ipynb        |   3 +-
 models.ipynb          | 545 +++++++++++++++++++++---------------------
 utils/LastInfected.py |   2 +-
 utils/LastWeeks.py    |   2 +-
 5 files changed, 282 insertions(+), 287 deletions(-)

diff --git a/OurPipeline.py b/OurPipeline.py
index 08454be..35531c0 100644
--- a/OurPipeline.py
+++ b/OurPipeline.py
@@ -6,16 +6,15 @@
 from utils.LastWeeks import LastWeeks
 from utils.LastInfected import LastInfected
 
-def create_pipeline(attr, n_weeks, pca_n_components=None,  n_non_train=4):
-    pipelist = [
+def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, n_non_train=4):
+
+    return Pipeline([
         ('imputer', ContinuityImputer(attributes=attr[n_non_train:])),
-        ('lw', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)),
-        ('lf', LastInfected(weeks=n_weeks)),
+        ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)),
+        ('l_infected', LastInfected(weeks=n_weeks_infected)),
         ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])),
         ('scaler', StandardScaler()),
+        ('pca', pca),
+        ('est_opt', estimator_optimizer),
     ]
-
-    if pca_n_components is not None:
-        pipelist.append(('pca', PCA(n_components=pca_n_components)))
-
-    return Pipeline(pipelist)
\ No newline at end of file
+)
\ No newline at end of file
diff --git a/analysis.ipynb b/analysis.ipynb
index 76e407c..cea2bd7 100644
--- a/analysis.ipynb
+++ b/analysis.ipynb
@@ -2829,9 +2829,10 @@
    "source": [
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
+    "from sklearn.decomposition import PCA\n",
     "\n",
     "attr=list(train_data)[:-1]\n",
-    "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=0.999)\n",
+    "pipeline = create_pipeline(attr, n_weeks=3, pca=PCA(0.999))\n",
     "\n",
     "X_train = pipeline.fit_transform(train_data.iloc[:,:-1].copy(), train_data.iloc[:,-1].copy())\n",
     "X_train.shape"
diff --git a/models.ipynb b/models.ipynb
index 27db8e7..cde0b58 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -103,39 +103,6 @@
     "X_train_1.shape"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Data Pipeline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1451, 83)"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%autoreload\n",
-    "from OurPipeline import create_pipeline\n",
-    "\n",
-    "pipeline = create_pipeline(attr, n_weeks=3, pca_n_components=None)\n",
-    "\n",
-    "X_train = pipeline.fit_transform(X_train_1, y_train)\n",
-    "X_train.shape"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -145,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 77,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -158,8 +125,9 @@
     "from scipy.stats import uniform as sp_uniform\n",
     "score_metric='neg_mean_absolute_error'\n",
     "jobs=-1 #-1 to make it execute in parallel\n",
-    "verbose_level = 1\n",
-    "random_n = 42"
+    "verbose_level = 0\n",
+    "random_n = 42\n",
+    "base_args = {'estimator': None, 'param_distributions': None, 'n_iter': None, 'scoring': score_metric, 'n_jobs': jobs, 'cv': None, 'verbose': verbose_level, 'random_state': random_n, 'return_train_score': True, 'iid': True}"
    ]
   },
   {
@@ -172,73 +140,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 78,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=4\n",
+    "k_folds=10\n",
     "n_iter_search = 20\n",
     "C = sp_randint(0, 10000)\n",
-    "params = {'kernel':['rbf', 'linear'], 'gamma':['scale'], 'C': C}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fitting 4 folds for each of 20 candidates, totalling 80 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 24.6min\n",
-      "[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed: 52.6min finished\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "-19.17685248872835"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "SVR_optimizer.fit(X_train, y_train)\n",
-    "SVR_optimizer.best_score_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "SVR(C=769, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "SVR_optimizer.best_estimator_"
+    "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n",
+    "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
    ]
   },
   {
@@ -260,80 +170,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "k_folds=10\n",
     "n_iter_search = 100\n",
     "min_samples = sp_uniform(0.01, 0.35)\n",
-    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    8.0s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   32.2s\n",
-      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:  1.3min\n",
-      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  2.3min\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.9min finished\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "-9.041006202618883"
-      ]
-     },
-     "execution_count": 71,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "Tree_optimizer.fit(X_train, y_train)\n",
-    "Tree_optimizer.best_score_"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
-       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-       "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
-       "           min_samples_split=0.107526262482814,\n",
-       "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-       "           splitter='best')"
-      ]
-     },
-     "execution_count": 72,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Tree_optimizer.best_estimator_"
+    "params = {'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}\n",
+    "Tree_optimizer = RandomizedSearchCV(estimator=DecisionTreeRegressor(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
    ]
   },
   {
@@ -350,161 +195,265 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "k_folds=10\n",
     "n_iter_search = 40\n",
-    "min_samples = sp_uniform(0.01, 0.35)\n",
-    "params = {'n_estimators': sp_randint(2,30), 'criterion':['mae'], 'max_depth': sp_randint(2, 10), 'min_samples_split': min_samples, 'min_samples_leaf': min_samples}"
+    "params = {'n_estimators': sp_randint(2,50), 'criterion':['mae'], 'max_depth': sp_randint(2, 10)}\n",
+    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 74,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fitting 10 folds for each of 40 candidates, totalling 400 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:   24.7s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:  2.4min\n",
-      "[Parallel(n_jobs=-1)]: Done 400 out of 400 | elapsed:  4.9min finished\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "-9.22168619342982"
-      ]
-     },
-     "execution_count": 74,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "Forest_optimizer.fit(X_train, y_train)\n",
-    "Forest_optimizer.best_score_"
+    "## Adaboost of Trees\n",
+    "* 10.78 - With 3 last weeks a 3 last infected \n",
+    "* **8.49** - With 3 last weeks a 3 last infected and only max_depth tuned."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=9,\n",
-       "           max_features='auto', max_leaf_nodes=None,\n",
-       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=0.014927937950279559,\n",
-       "           min_samples_split=0.0795948414310818,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=9, n_jobs=-1,\n",
-       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
-      ]
-     },
-     "execution_count": 75,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "Forest_optimizer.best_estimator_"
+    "k_folds=10\n",
+    "n_iter_search = 20\n",
+    "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7), 'base_estimator__n_estimators': sp_randint(40, 100)}\n",
+    "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Adaboost of Trees\n",
-    "* 10.78 - With 3 last weeks a 3 last infected \n",
-    "* **8.49** - With 3 last weeks a 3 last infected and only max_depth tuned."
+    "## KNN\n",
+    "* 21.349 - with PCA at 0.65 & 2 previous weeks\n",
+    "* 20.36  - without PCA"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "k_folds=10\n",
-    "n_iter_search = 10\n",
-    "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7)}"
+    "n_iter_search = 100\n",
+    "params = {'n_neighbors': sp_randint(3,150), 'weights': ['uniform', 'distance']}\n",
+    "KNN_optimizer = RandomizedSearchCV(estimator=KNeighborsRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "AdaTree_optimizer.fit(X_train, y_train)\n",
-    "AdaTree_optimizer.best_score_"
+    "# Optimization"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
+   "execution_count": 76,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=4, max_features=None,\n",
-       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-       "           min_impurity_split=None, min_samples_leaf=1,\n",
-       "           min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
-       "           presort=False, random_state=None, splitter='best'),\n",
-       "         learning_rate=1.0, loss='linear', n_estimators=50,\n",
-       "         random_state=None)"
-      ]
-     },
-     "execution_count": 92,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    3.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 239 tasks      | elapsed:   11.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 739 tasks      | elapsed:   36.9s\n",
+      "[Parallel(n_jobs=-1)]: Done 993 out of 1000 | elapsed:   50.0s remaining:    0.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   50.1s finished\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Best score of 12.7829083390765 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+      "           min_samples_split=0.08977730688967958,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]\n",
+      "1/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+      "           min_samples_split=0.08977730688967958,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]]\n",
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.1s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   16.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   41.2s\n",
+      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.6min finished\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Best score of 8.982081323225362 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
+      "           min_samples_split=0.107526262482814,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, None, -8.982081323225362]\n",
+      "2/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+      "           min_samples_split=0.08977730688967958,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
+      "           min_samples_split=0.107526262482814,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, None, -8.982081323225362]]\n",
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    6.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 420 tasks      | elapsed:   18.9s\n",
+      "[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   45.4s\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   49.9s finished\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "3/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+      "           min_samples_split=0.08977730688967958,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
+      "           min_samples_split=0.107526262482814,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 3, None, -8.982081323225362], [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+      "           min_samples_split=0.08977730688967958,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best'), 1, 4, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+      "  svd_solver='auto', tol=0.0, whiten=False), -13.062715368711233]]\n",
+      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.1s\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-76-2cc77083a6a3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m                 \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m                 \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m                 \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m                 \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    265\u001b[0m         \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    266\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    268\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m    720\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    724\u001b[0m         \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m   1513\u001b[0m         evaluate_candidates(ParameterSampler(\n\u001b[1;32m   1514\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m             random_state=self.random_state))\n\u001b[0m",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mevaluate_candidates\u001b[0;34m(candidate_params)\u001b[0m\n\u001b[1;32m    709\u001b[0m                                \u001b[0;32mfor\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    710\u001b[0m                                in product(candidate_params,\n\u001b[0;32m--> 711\u001b[0;31m                                           cv.split(X, y, groups)))\n\u001b[0m\u001b[1;32m    712\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    713\u001b[0m                 \u001b[0mall_candidate_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcandidate_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    929\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    931\u001b[0m             \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    932\u001b[0m             \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    831\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    832\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    834\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    835\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m    519\u001b[0m         AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m    520\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    522\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    523\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    425\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    429\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    294\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    297\u001b[0m                 \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    298\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
     }
    ],
    "source": [
-    "AdaTree_optimizer.best_estimator_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## KNN\n",
-    "* -21.349 - with PCA at 0.65 & 2 previous weeks\n",
-    "* -20.36  - without PCA"
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "#pipeline = create_pipeline(attr, estimator_optimizer=RandomizedSearchCV(None, None), n_weeks=0, n_weeks_infected=0)\n",
+    "optimizers=[Tree_optimizer]#, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
+    "weeks = [1,2]\n",
+    "weeks_infected = [3,4]\n",
+    "pca = [PCA(0.95), None]\n",
+    "\n",
+    "n_total = len(optimizers) * len(weeks) * len(weeks_infected) * len(pca)\n",
+    "\n",
+    "results=[]\n",
+    "best_attempt = None\n",
+    "best_score = np.inf\n",
+    "idx=0\n",
+    "for opt in optimizers:\n",
+    "    for w in weeks:\n",
+    "        for wi in weeks_infected:\n",
+    "            for p in pca:\n",
+    "                pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, pca=p)\n",
+    "                pipeline.fit(X_train_1, y_train)\n",
+    "                score = pipeline.named_steps['est_opt'].best_score_\n",
+    "                best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n",
+    "                attempt = [best_estimator, w, wi, p, score]\n",
+    "                if abs(score) < best_score:\n",
+    "                    best_score = abs(score)\n",
+    "                    best_estimator = attempt\n",
+    "                    print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n",
+    "                idx+=1\n",
+    "                print(str(idx) + '/' + str(n_total), end='\\t')\n",
+    "                results.append(attempt)\n",
+    "                print(results)\n",
+    "                    "
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=10\n",
-    "n_iter_search = 100\n",
-    "params = {'n_neighbors': sp_randint(3,150), 'weights': ['uniform', 'distance']}"
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=2, n_weeks_infected=2, estimator_optimizer=Tree_optimizer, pca=None)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 69,
    "metadata": {},
    "outputs": [
     {
@@ -519,29 +468,75 @@
      "output_type": "stream",
      "text": [
       "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.5s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   19.7s\n",
-      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   45.0s\n",
-      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.3min\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.6min finished\n"
+      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    5.9s\n",
+      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   23.9s\n",
+      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   56.8s\n",
+      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.8min\n",
+      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.2min finished\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "-20.359505759574677"
+       "Pipeline(memory=None,\n",
+       "     steps=[('imputer', ContinuityImputer(attributes=['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 'precipitation_amt_mm', 'reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2', 'reanalys...t=True,\n",
+       "          return_train_score=True, scoring='neg_mean_absolute_error',\n",
+       "          verbose=1))])"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.fit(X_train_1, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-9.066505858028945"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 70,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "KNN_optimizer = RandomizedSearchCV(estimator=KNeighborsRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)\n",
-    "KNN_optimizer.fit(X_train, y_train)\n",
-    "KNN_optimizer.best_score_"
+    "pipeline.named_steps['est_opt'].best_score_"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (<ipython-input-72-0452fe5c3e92>, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-72-0452fe5c3e92>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    2.copy()\u001b[0m\n\u001b[0m         ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
+   "source": [
+    "2.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
index 6c1c60c..1830dce 100644
--- a/utils/LastInfected.py
+++ b/utils/LastInfected.py
@@ -24,8 +24,8 @@ def transform(self, X, model=None):
         for idx, n_infected in enumerate(self.y):
             city = X.loc[idx, 'city']
             r[idx] = self.last[city]
-            self.last[city].pop()
             self.last[city].appendleft(n_infected)
+            self.last[city].pop()
 
         r = pd.DataFrame(r, columns=[self.new_attributes_prefix + str(week) for week in range(self.weeks)])
         
diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py
index abe0379..0de33dd 100644
--- a/utils/LastWeeks.py
+++ b/utils/LastWeeks.py
@@ -27,8 +27,8 @@ def transform(self, X):
         for idx, week in X.iterrows():
             city = week['city']
             r[idx] = self.last[city]
-            self.last[city].pop()
             self.last[city].appendleft(week[self.attributes])
+            self.last[city].pop()
 
         r = pd.DataFrame(r.reshape([X.shape[0], self.weeks * len(self.attributes)]),
                      columns=[self.new_attributes_prefix + str(week) + '_' + str(attr)

From 2c2127f58ae21f1ebb1b76e7b13b2d3a5cf3f475 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Thu, 18 Apr 2019 08:59:55 +0100
Subject: [PATCH 11/24] optimizing after error

---
 models.ipynb | 470 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 296 insertions(+), 174 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index cde0b58..d8de1c1 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,13 +216,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "k_folds=10\n",
     "n_iter_search = 20\n",
-    "params = {'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7), 'base_estimator__n_estimators': sp_randint(40, 100)}\n",
+    "params = {'n_estimators': sp_randint(40, 100), 'base_estimator__criterion':['mae'], 'base_estimator__max_depth': sp_randint(2,7)}\n",
     "AdaTree_optimizer = RandomizedSearchCV(estimator=AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
    ]
   },
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -251,145 +251,94 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Optimization"
+    "# Optimization\n"
+   ]
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": [
+    "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+    "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+    "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+    "           min_samples_split=0.08977730688967958,\n",
+    "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+    "           splitter='best')\n",
+    "1/40\t\n",
+    "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+    "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+    "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
+    "           min_samples_split=0.107526262482814,\n",
+    "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+    "           splitter='best')\n",
+    "2/40\t3/40\t\n",
+    "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+    "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+    "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
+    "           min_samples_split=0.107526262482814,\n",
+    "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+    "           splitter='best')\n",
+    "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n",
+    "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n",
+    "           max_features='auto', max_leaf_nodes=None,\n",
+    "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+    "           min_samples_leaf=1, min_samples_split=2,\n",
+    "           min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n",
+    "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n",
+    "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    3.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 239 tasks      | elapsed:   11.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 739 tasks      | elapsed:   36.9s\n",
-      "[Parallel(n_jobs=-1)]: Done 993 out of 1000 | elapsed:   50.0s remaining:    0.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   50.1s finished\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "\n",
-      "Best score of 12.7829083390765 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-      "           min_samples_split=0.08977730688967958,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]\n",
-      "1/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
       "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
       "           min_samples_split=0.08977730688967958,\n",
       "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765]]\n",
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.1s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   16.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   41.2s\n",
-      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.3min\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  1.6min finished\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Best score of 8.982081323225362 with the estimator [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+      "           splitter='best')\n",
+      "1/40\t\n",
+      "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
       "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
       "           min_samples_split=0.107526262482814,\n",
       "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, None, -8.982081323225362]\n",
-      "2/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-      "           min_samples_split=0.08977730688967958,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
+      "           splitter='best')\n",
+      "2/40\t3/40\t\n",
+      "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
       "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
       "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
       "           min_samples_split=0.107526262482814,\n",
       "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, None, -8.982081323225362]]\n",
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
+      "           splitter='best')\n",
+      "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n",
+      "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n",
+      "           max_features='auto', max_leaf_nodes=None,\n",
+      "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+      "           min_samples_leaf=1, min_samples_split=2,\n",
+      "           min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n",
+      "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n",
+      "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40\t"
      ]
     },
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    6.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 420 tasks      | elapsed:   18.9s\n",
-      "[Parallel(n_jobs=-1)]: Done 920 tasks      | elapsed:   45.4s\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:   49.9s finished\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "3/8\t[[DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-      "           min_samples_split=0.08977730688967958,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-      "  svd_solver='auto', tol=0.0, whiten=False), -12.7829083390765], [DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
-      "           min_samples_split=0.107526262482814,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 3, None, -8.982081323225362], [DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-      "           min_samples_split=0.08977730688967958,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best'), 1, 4, PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-      "  svd_solver='auto', tol=0.0, whiten=False), -13.062715368711233]]\n",
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.1s\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
+     "ename": "ValueError",
+     "evalue": "Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n           max_leaf_nodes=None, min_impurity_decrease=0.0,\n           min_impurity_split=None, min_samples_leaf=1,\n           min_samples_split=2, min_weight_fraction_leaf=0.0,\n           presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-76-2cc77083a6a3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m                 \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m                 \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m                 \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m                 \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31m_RemoteTraceback\u001b[0m                          Traceback (most recent call last)",
+      "\u001b[0;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 418, in _process_worker\n    r = call_item()\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 272, in __call__\n    return self.fn(*self.args, **self.kwargs)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\", line 567, in __call__\n    return self.func(*args, **kwargs)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in __call__\n    for func, args, kwargs in self.items]\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in <listcomp>\n    for func, args, kwargs in self.items]\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_validation.py\", line 514, in _fit_and_score\n    estimator.set_params(**parameters)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 224, in set_params\n    valid_params[key].set_params(**sub_params)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 215, in set_params\n    (key, self))\nValueError: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n           max_leaf_nodes=None, min_impurity_decrease=0.0,\n           min_impurity_split=None, min_samples_leaf=1,\n           min_samples_split=2, min_weight_fraction_leaf=0.0,\n           presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.\n\"\"\"",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-10-8369b74c1e80>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m                 \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m                 \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m                 \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m                 \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    265\u001b[0m         \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    266\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    268\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m    720\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    724\u001b[0m         \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m   1513\u001b[0m         evaluate_candidates(ParameterSampler(\n\u001b[1;32m   1514\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m             random_state=self.random_state))\n\u001b[0m",
@@ -397,18 +346,18 @@
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    929\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    931\u001b[0m             \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    932\u001b[0m             \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    831\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    832\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    834\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    835\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m    519\u001b[0m         AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m    520\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    522\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    523\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    425\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    429\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    294\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    297\u001b[0m                 \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    298\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    430\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    431\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mFINISHED\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 432\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    433\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    434\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36m__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    382\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    383\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 384\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    385\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    386\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mValueError\u001b[0m: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n           max_leaf_nodes=None, min_impurity_decrease=0.0,\n           min_impurity_split=None, min_samples_leaf=1,\n           min_samples_split=2, min_weight_fraction_leaf=0.0,\n           presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`."
      ]
     }
    ],
    "source": [
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
+    "from sklearn.decomposition import PCA\n",
     "\n",
-    "#pipeline = create_pipeline(attr, estimator_optimizer=RandomizedSearchCV(None, None), n_weeks=0, n_weeks_infected=0)\n",
-    "optimizers=[Tree_optimizer]#, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
+    "optimizers=[AdaTree_optimizer, KNN_optimizer, SVR_optimizer]#[Tree_optimizer, Forest_optimizer \n",
     "weeks = [1,2]\n",
     "weeks_infected = [3,4]\n",
     "pca = [PCA(0.95), None]\n",
@@ -430,105 +379,278 @@
     "                attempt = [best_estimator, w, wi, p, score]\n",
     "                if abs(score) < best_score:\n",
     "                    best_score = abs(score)\n",
-    "                    best_estimator = attempt\n",
+    "                    best_attempt = attempt\n",
     "                    print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n",
     "                idx+=1\n",
     "                print(str(idx) + '/' + str(n_total), end='\\t')\n",
-    "                results.append(attempt)\n",
-    "                print(results)\n",
-    "                    "
+    "                results.append(attempt)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%autoreload\n",
-    "from OurPipeline import create_pipeline\n",
-    "\n",
-    "pipeline = create_pipeline(attr, n_weeks=2, n_weeks_infected=2, estimator_optimizer=Tree_optimizer, pca=None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Fitting 10 folds for each of 100 candidates, totalling 1000 fits\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.\n",
-      "[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    5.9s\n",
-      "[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   23.9s\n",
-      "[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   56.8s\n",
-      "[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.8min\n",
-      "[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:  2.2min finished\n"
-     ]
-    },
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>estimator</th>\n",
+       "      <th>weeks</th>\n",
+       "      <th>weeks_infected</th>\n",
+       "      <th>PCA</th>\n",
+       "      <th>score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-12.752929</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-8.983460</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-13.050310</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-8.971744</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-13.235700</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-9.039628</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-12.913163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-9.025844</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-9.806561</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-7.174190</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-10.178593</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-7.197634</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-10.119454</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-7.177250</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
+       "      <td>-10.353996</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>None</td>\n",
+       "      <td>-7.212562</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "Pipeline(memory=None,\n",
-       "     steps=[('imputer', ContinuityImputer(attributes=['ndvi_ne', 'ndvi_nw', 'ndvi_se', 'ndvi_sw', 'precipitation_amt_mm', 'reanalysis_air_temp_k', 'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k', 'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k', 'reanalysis_precip_amt_kg_per_m2', 'reanalys...t=True,\n",
-       "          return_train_score=True, scoring='neg_mean_absolute_error',\n",
-       "          verbose=1))])"
+       "                                            estimator  weeks  weeks_infected  \\\n",
+       "0   DecisionTreeRegressor(criterion='mae', max_dep...      1               3   \n",
+       "1   DecisionTreeRegressor(criterion='mae', max_dep...      1               3   \n",
+       "2   DecisionTreeRegressor(criterion='mae', max_dep...      1               4   \n",
+       "3   DecisionTreeRegressor(criterion='mae', max_dep...      1               4   \n",
+       "4   DecisionTreeRegressor(criterion='mae', max_dep...      2               3   \n",
+       "5   DecisionTreeRegressor(criterion='mae', max_dep...      2               3   \n",
+       "6   DecisionTreeRegressor(criterion='mae', max_dep...      2               4   \n",
+       "7   DecisionTreeRegressor(criterion='mae', max_dep...      2               4   \n",
+       "8   (DecisionTreeRegressor(criterion='mae', max_de...      1               3   \n",
+       "9   (DecisionTreeRegressor(criterion='mae', max_de...      1               3   \n",
+       "10  (DecisionTreeRegressor(criterion='mae', max_de...      1               4   \n",
+       "11  (DecisionTreeRegressor(criterion='mae', max_de...      1               4   \n",
+       "12  (DecisionTreeRegressor(criterion='mae', max_de...      2               3   \n",
+       "13  (DecisionTreeRegressor(criterion='mae', max_de...      2               3   \n",
+       "14  (DecisionTreeRegressor(criterion='mae', max_de...      2               4   \n",
+       "15  (DecisionTreeRegressor(criterion='mae', max_de...      2               4   \n",
+       "\n",
+       "                                                  PCA      score  \n",
+       "0   PCA(copy=True, iterated_power='auto', n_compon... -12.752929  \n",
+       "1                                                None  -8.983460  \n",
+       "2   PCA(copy=True, iterated_power='auto', n_compon... -13.050310  \n",
+       "3                                                None  -8.971744  \n",
+       "4   PCA(copy=True, iterated_power='auto', n_compon... -13.235700  \n",
+       "5                                                None  -9.039628  \n",
+       "6   PCA(copy=True, iterated_power='auto', n_compon... -12.913163  \n",
+       "7                                                None  -9.025844  \n",
+       "8   PCA(copy=True, iterated_power='auto', n_compon...  -9.806561  \n",
+       "9                                                None  -7.174190  \n",
+       "10  PCA(copy=True, iterated_power='auto', n_compon... -10.178593  \n",
+       "11                                               None  -7.197634  \n",
+       "12  PCA(copy=True, iterated_power='auto', n_compon... -10.119454  \n",
+       "13                                               None  -7.177250  \n",
+       "14  PCA(copy=True, iterated_power='auto', n_compon... -10.353996  \n",
+       "15                                               None  -7.212562  "
       ]
      },
-     "execution_count": 69,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipeline.fit(X_train_1, y_train)"
+    "pd.DataFrame(results, columns=['estimator', 'weeks', 'weeks_infected', 'PCA', 'score'])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "-9.066505858028945"
+       "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n",
+       "            max_features='auto', max_leaf_nodes=None,\n",
+       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "            min_samples_leaf=1, min_samples_split=2,\n",
+       "            min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n",
+       "            oob_score=False, random_state=None, verbose=0, warm_start=False),\n",
+       " 1,\n",
+       " 3,\n",
+       " None,\n",
+       " -7.174190213645762]"
       ]
      },
-     "execution_count": 70,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipeline.named_steps['est_opt'].best_score_"
+    "best_attempt"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
-     "ename": "SyntaxError",
-     "evalue": "invalid syntax (<ipython-input-72-0452fe5c3e92>, line 1)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-72-0452fe5c3e92>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    2.copy()\u001b[0m\n\u001b[0m         ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
-     ]
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
+       "           max_features='auto', max_leaf_nodes=None,\n",
+       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "           min_samples_leaf=1, min_samples_split=2,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=41, n_jobs=-1,\n",
+       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "2.copy()"
+    "best_estimator"
    ]
   },
   {

From 38184b47a96340910370706ab85325049ee27100 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Thu, 18 Apr 2019 21:23:33 +0100
Subject: [PATCH 12/24] ALL THE MODELS DATA

---
 models.ipynb | 177 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 129 insertions(+), 48 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index d8de1c1..8640b57 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -140,12 +140,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
     "k_folds=10\n",
-    "n_iter_search = 20\n",
+    "n_iter_search = 5\n",
     "C = sp_randint(0, 10000)\n",
     "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n",
     "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
@@ -170,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -251,7 +251,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Optimization\n"
+    "# Optimization\n",
+    "* Interestingly, PCA mas all the models worst in this case."
    ]
   },
   {
@@ -290,7 +291,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -298,47 +299,22 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-      "           min_samples_split=0.08977730688967958,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best')\n",
-      "1/40\t\n",
-      "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
-      "           min_samples_split=0.107526262482814,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best')\n",
-      "2/40\t3/40\t\n",
-      "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
-      "           min_samples_split=0.107526262482814,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best')\n",
-      "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n",
-      "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n",
-      "           max_features='auto', max_leaf_nodes=None,\n",
-      "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-      "           min_samples_leaf=1, min_samples_split=2,\n",
-      "           min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n",
-      "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n",
-      "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40\t"
+      "Best score of 7.332691708334687 with the estimator SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+      "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n",
+      "1/8\t\n",
+      "Best score of 6.522347109745663 with the estimator SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+      "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n",
+      "2/8\t3/8\t4/8\t"
      ]
     },
     {
-     "ename": "ValueError",
-     "evalue": "Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n           max_leaf_nodes=None, min_impurity_decrease=0.0,\n           min_impurity_split=None, min_samples_leaf=1,\n           min_samples_split=2, min_weight_fraction_leaf=0.0,\n           presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.",
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31m_RemoteTraceback\u001b[0m                          Traceback (most recent call last)",
-      "\u001b[0;31m_RemoteTraceback\u001b[0m: \n\"\"\"\nTraceback (most recent call last):\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 418, in _process_worker\n    r = call_item()\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/externals/loky/process_executor.py\", line 272, in __call__\n    return self.fn(*self.args, **self.kwargs)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\", line 567, in __call__\n    return self.func(*args, **kwargs)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in __call__\n    for func, args, kwargs in self.items]\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\", line 225, in <listcomp>\n    for func, args, kwargs in self.items]\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_validation.py\", line 514, in _fit_and_score\n    estimator.set_params(**parameters)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 224, in set_params\n    valid_params[key].set_params(**sub_params)\n  File \"/Users/miguellobo/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/base.py\", line 215, in set_params\n    (key, self))\nValueError: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n           max_leaf_nodes=None, min_impurity_decrease=0.0,\n           min_impurity_split=None, min_samples_leaf=1,\n           min_samples_split=2, min_weight_fraction_leaf=0.0,\n           presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`.\n\"\"\"",
-      "\nThe above exception was the direct cause of the following exception:\n",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-10-8369b74c1e80>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m                 \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m                 \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m                 \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m                 \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-36-cee1190a7fa8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m                 \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m                 \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m                 \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m                 \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    265\u001b[0m         \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    266\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    268\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m    720\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    724\u001b[0m         \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m   1513\u001b[0m         evaluate_candidates(ParameterSampler(\n\u001b[1;32m   1514\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m             random_state=self.random_state))\n\u001b[0m",
@@ -346,9 +322,9 @@
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    929\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    931\u001b[0m             \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    932\u001b[0m             \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    831\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    832\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    834\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    835\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
       "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m    519\u001b[0m         AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m    520\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    522\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    523\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    430\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mCancelledError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    431\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mFINISHED\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 432\u001b[0;31m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    433\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    434\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36m__get_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    382\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    383\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 384\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_exception\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    385\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    386\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_result\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: Invalid parameter n_estimators for estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n           max_leaf_nodes=None, min_impurity_decrease=0.0,\n           min_impurity_split=None, min_samples_leaf=1,\n           min_samples_split=2, min_weight_fraction_leaf=0.0,\n           presort=False, random_state=None, splitter='best'). Check the list of available parameters with `estimator.get_params().keys()`."
+      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    425\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    429\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    294\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    297\u001b[0m                 \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    298\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -357,7 +333,7 @@
     "from OurPipeline import create_pipeline\n",
     "from sklearn.decomposition import PCA\n",
     "\n",
-    "optimizers=[AdaTree_optimizer, KNN_optimizer, SVR_optimizer]#[Tree_optimizer, Forest_optimizer \n",
+    "optimizers=[SVR_optimizer]#[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, \n",
     "weeks = [1,2]\n",
     "weeks_infected = [3,4]\n",
     "pca = [PCA(0.95), None]\n",
@@ -653,12 +629,117 @@
     "best_estimator"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=6, max_features=None,\n",
+       "            max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+       "            min_impurity_split=None, min_samples_leaf=1,\n",
+       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
+       "            presort=False, random_state=None, splitter='best'),\n",
+       "          learning_rate=1.0, loss='linear', n_estimators=41,\n",
+       "          random_state=None), 2, 4, None, -7.412474155754652]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "best_attempt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "   kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
+       " 1,\n",
+       " 3,\n",
+       " None,\n",
+       " -6.522347109745663]"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "best_attempt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
+       "  1,\n",
+       "  3,\n",
+       "  PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+       "    svd_solver='auto', tol=0.0, whiten=False),\n",
+       "  -7.332691708334687],\n",
+       " [SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
+       "  1,\n",
+       "  3,\n",
+       "  None,\n",
+       "  -6.522347109745663],\n",
+       " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
+       "  1,\n",
+       "  4,\n",
+       "  PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
+       "    svd_solver='auto', tol=0.0, whiten=False),\n",
+       "  -7.8651335759783985],\n",
+       " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
+       "  1,\n",
+       "  4,\n",
+       "  None,\n",
+       "  -6.53493877991638]]"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Predict"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "model = SVR()"
+   ]
   }
  ],
  "metadata": {

From 1deaa13eb8a1ab7619ffb728831e757a8a4ac6c4 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 00:47:18 +0100
Subject: [PATCH 13/24] sequential predictions and with noise

---
 OurPipeline.py        |   4 +-
 models.ipynb          | 541 +++++++++++++++---------------------------
 utils/LastInfected.py |  33 ++-
 utils/LastWeeks.py    |   2 +
 4 files changed, 219 insertions(+), 361 deletions(-)

diff --git a/OurPipeline.py b/OurPipeline.py
index 35531c0..40654cd 100644
--- a/OurPipeline.py
+++ b/OurPipeline.py
@@ -6,12 +6,12 @@
 from utils.LastWeeks import LastWeeks
 from utils.LastInfected import LastInfected
 
-def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, n_non_train=4):
+def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, add_noise=False, noise_mean=None, noise_std=None, n_non_train=4):
 
     return Pipeline([
         ('imputer', ContinuityImputer(attributes=attr[n_non_train:])),
         ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)),
-        ('l_infected', LastInfected(weeks=n_weeks_infected)),
+        ('l_infected', LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)),
         ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])),
         ('scaler', StandardScaler()),
         ('pca', pca),
diff --git a/models.ipynb b/models.ipynb
index 8640b57..0f971f8 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -170,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -252,88 +252,21 @@
    "metadata": {},
    "source": [
     "# Optimization\n",
-    "* Interestingly, PCA mas all the models worst in this case."
-   ]
-  },
-  {
-   "cell_type": "raw",
-   "metadata": {},
-   "source": [
-    "Best score of 12.752929014472777 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-    "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-    "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-    "           min_samples_split=0.08977730688967958,\n",
-    "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-    "           splitter='best')\n",
-    "1/40\t\n",
-    "Best score of 8.983459682977257 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
-    "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-    "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
-    "           min_samples_split=0.107526262482814,\n",
-    "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-    "           splitter='best')\n",
-    "2/40\t3/40\t\n",
-    "Best score of 8.971743625086148 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=5, max_features=None,\n",
-    "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-    "           min_impurity_split=None, min_samples_leaf=0.02801756293749627,\n",
-    "           min_samples_split=0.107526262482814,\n",
-    "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-    "           splitter='best')\n",
-    "4/40\t5/40\t6/40\t7/40\t8/40\t9/40\t\n",
-    "Best score of 7.174190213645762 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n",
-    "           max_features='auto', max_leaf_nodes=None,\n",
-    "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-    "           min_samples_leaf=1, min_samples_split=2,\n",
-    "           min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n",
-    "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n",
-    "10/40\t11/40\t12/40\t13/40\t14/40\t15/40\t16/40"
+    "* Interestingly, PCA makes all the models worst in this case.\n",
+    "* After the exaustive search, the best model was the SVR which obtained an MAE of 6.52."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Best score of 7.332691708334687 with the estimator SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-      "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n",
-      "1/8\t\n",
-      "Best score of 6.522347109745663 with the estimator SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-      "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)\n",
-      "2/8\t3/8\t4/8\t"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-36-cee1190a7fa8>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     19\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mp\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m                 \u001b[0mpipeline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcreate_pipeline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mattr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_weeks_infected\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mwi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mestimator_optimizer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mopt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpca\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m                 \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     22\u001b[0m                 \u001b[0mscore\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     23\u001b[0m                 \u001b[0mbest_estimator\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnamed_steps\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'est_opt'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_estimator_\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/pipeline.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    265\u001b[0m         \u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfit_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    266\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_final_estimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    268\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, groups, **fit_params)\u001b[0m\n\u001b[1;32m    720\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    721\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 722\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_run_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mevaluate_candidates\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    724\u001b[0m         \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresults_container\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36m_run_search\u001b[0;34m(self, evaluate_candidates)\u001b[0m\n\u001b[1;32m   1513\u001b[0m         evaluate_candidates(ParameterSampler(\n\u001b[1;32m   1514\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparam_distributions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iter\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1515\u001b[0;31m             random_state=self.random_state))\n\u001b[0m",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001b[0m in \u001b[0;36mevaluate_candidates\u001b[0;34m(candidate_params)\u001b[0m\n\u001b[1;32m    709\u001b[0m                                \u001b[0;32mfor\u001b[0m \u001b[0mparameters\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    710\u001b[0m                                in product(candidate_params,\n\u001b[0;32m--> 711\u001b[0;31m                                           cv.split(X, y, groups)))\n\u001b[0m\u001b[1;32m    712\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    713\u001b[0m                 \u001b[0mall_candidate_params\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcandidate_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m    928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    929\u001b[0m             \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieval_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 930\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mretrieve\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    931\u001b[0m             \u001b[0;31m# Make sure that we get a last message telling us we are done\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    932\u001b[0m             \u001b[0melapsed_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_start_time\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py\u001b[0m in \u001b[0;36mretrieve\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    831\u001b[0m             \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    832\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_backend\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'supports_timeout'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 833\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    834\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    835\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_output\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/Development/UMinho-AA2/.env/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py\u001b[0m in \u001b[0;36mwrap_future_result\u001b[0;34m(future, timeout)\u001b[0m\n\u001b[1;32m    519\u001b[0m         AsyncResults.get from multiprocessing.\"\"\"\n\u001b[1;32m    520\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfuture\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    522\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mLokyTimeoutError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    523\u001b[0m             \u001b[0;32mraise\u001b[0m \u001b[0mTimeoutError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/concurrent/futures/_base.py\u001b[0m in \u001b[0;36mresult\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    425\u001b[0m                 \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__get_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    426\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 427\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_condition\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    428\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    429\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_state\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mCANCELLED\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mCANCELLED_AND_NOTIFIED\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    294\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    295\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 296\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    297\u001b[0m                 \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    298\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
     "from sklearn.decomposition import PCA\n",
     "\n",
-    "optimizers=[SVR_optimizer]#[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, \n",
+    "optimizers=[SVR_optimizer, Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]\n",
     "weeks = [1,2]\n",
     "weeks_infected = [3,4]\n",
     "pca = [PCA(0.95), None]\n",
@@ -364,238 +297,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>estimator</th>\n",
-       "      <th>weeks</th>\n",
-       "      <th>weeks_infected</th>\n",
-       "      <th>PCA</th>\n",
-       "      <th>score</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-12.752929</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-8.983460</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-13.050310</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-8.971744</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-13.235700</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-9.039628</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-12.913163</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-9.025844</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-9.806561</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-7.174190</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-10.178593</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-7.197634</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-10.119454</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-7.177250</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>PCA(copy=True, iterated_power='auto', n_compon...</td>\n",
-       "      <td>-10.353996</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>None</td>\n",
-       "      <td>-7.212562</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                            estimator  weeks  weeks_infected  \\\n",
-       "0   DecisionTreeRegressor(criterion='mae', max_dep...      1               3   \n",
-       "1   DecisionTreeRegressor(criterion='mae', max_dep...      1               3   \n",
-       "2   DecisionTreeRegressor(criterion='mae', max_dep...      1               4   \n",
-       "3   DecisionTreeRegressor(criterion='mae', max_dep...      1               4   \n",
-       "4   DecisionTreeRegressor(criterion='mae', max_dep...      2               3   \n",
-       "5   DecisionTreeRegressor(criterion='mae', max_dep...      2               3   \n",
-       "6   DecisionTreeRegressor(criterion='mae', max_dep...      2               4   \n",
-       "7   DecisionTreeRegressor(criterion='mae', max_dep...      2               4   \n",
-       "8   (DecisionTreeRegressor(criterion='mae', max_de...      1               3   \n",
-       "9   (DecisionTreeRegressor(criterion='mae', max_de...      1               3   \n",
-       "10  (DecisionTreeRegressor(criterion='mae', max_de...      1               4   \n",
-       "11  (DecisionTreeRegressor(criterion='mae', max_de...      1               4   \n",
-       "12  (DecisionTreeRegressor(criterion='mae', max_de...      2               3   \n",
-       "13  (DecisionTreeRegressor(criterion='mae', max_de...      2               3   \n",
-       "14  (DecisionTreeRegressor(criterion='mae', max_de...      2               4   \n",
-       "15  (DecisionTreeRegressor(criterion='mae', max_de...      2               4   \n",
-       "\n",
-       "                                                  PCA      score  \n",
-       "0   PCA(copy=True, iterated_power='auto', n_compon... -12.752929  \n",
-       "1                                                None  -8.983460  \n",
-       "2   PCA(copy=True, iterated_power='auto', n_compon... -13.050310  \n",
-       "3                                                None  -8.971744  \n",
-       "4   PCA(copy=True, iterated_power='auto', n_compon... -13.235700  \n",
-       "5                                                None  -9.039628  \n",
-       "6   PCA(copy=True, iterated_power='auto', n_compon... -12.913163  \n",
-       "7                                                None  -9.025844  \n",
-       "8   PCA(copy=True, iterated_power='auto', n_compon...  -9.806561  \n",
-       "9                                                None  -7.174190  \n",
-       "10  PCA(copy=True, iterated_power='auto', n_compon... -10.178593  \n",
-       "11                                               None  -7.197634  \n",
-       "12  PCA(copy=True, iterated_power='auto', n_compon... -10.119454  \n",
-       "13                                               None  -7.177250  \n",
-       "14  PCA(copy=True, iterated_power='auto', n_compon... -10.353996  \n",
-       "15                                               None  -7.212562  "
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pd.DataFrame(results, columns=['estimator', 'weeks', 'weeks_infected', 'PCA', 'score'])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=6,\n",
-       "            max_features='auto', max_leaf_nodes=None,\n",
-       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "            min_samples_leaf=1, min_samples_split=2,\n",
-       "            min_weight_fraction_leaf=0.0, n_estimators=8, n_jobs=-1,\n",
-       "            oob_score=False, random_state=None, verbose=0, warm_start=False),\n",
+       "[SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "   kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
        " 1,\n",
        " 3,\n",
        " None,\n",
-       " -7.174190213645762]"
+       " -6.522347109745663]"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -604,132 +329,243 @@
     "best_attempt"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Predict"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 171,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
-       "           max_features='auto', max_leaf_nodes=None,\n",
-       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=1, min_samples_split=2,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=41, n_jobs=-1,\n",
-       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+       "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 56,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "best_estimator"
+    "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Loading test data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(416, 24)\n"
+     ]
+    }
+   ],
+   "source": [
+    "X_test_1 = pd.read_csv('data/dengue_features_test.csv')\n",
+    "print(X_test_1.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## One by one prediction\n",
+    "* Given that we are making sequential predictions, i.e.: the prediction from a week relies on the prediction from the previous weeks, we must make the transformations and predictions one by one.\n",
+    "* Given that this kind of prediction is very prone to a snowball effect on errors our first solution had an error of 26. To solve this we came up with the idea of adding noise to the train data. However for this solution we need to know both: the mean of the error and its standard deviation (*std*). We already know the mean (MAE), we just need to know the *std*"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 172,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[AdaBoostRegressor(base_estimator=DecisionTreeRegressor(criterion='mae', max_depth=6, max_features=None,\n",
-       "            max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-       "            min_impurity_split=None, min_samples_leaf=1,\n",
-       "            min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
-       "            presort=False, random_state=None, splitter='best'),\n",
-       "          learning_rate=1.0, loss='linear', n_estimators=41,\n",
-       "          random_state=None), 2, 4, None, -7.412474155754652]"
+       "416"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 172,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "best_attempt"
+    "predictions=[]\n",
+    "for idx in range(X_test_1.shape[0]):\n",
+    "    x = pipeline.transform(X_test_1.loc[idx:idx,:])\n",
+    "    pred = model.predict(x)\n",
+    "    pred = int(np.round(pred))\n",
+    "    pipeline.named_steps['l_infected'].append_y(pred)\n",
+    "    predictions.append(pred)\n",
+    "len(predictions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Calculating an approximation of the *std*\n",
+    "* It is approximately 10.9. We can see that the MAE is close to the one calculated in the cross-validation."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 216,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "from sklearn.model_selection import ShuffleSplit\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 217,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sp = ShuffleSplit(n_splits=1, train_size=1000, test_size=None, random_state=random_n)\n",
+    "for train, test in sp.split(X_train, y_train):\n",
+    "    X_train_std = X_train[train]\n",
+    "    y_train_std = y_train[train]\n",
+    "    X_test_std = X_train[test]\n",
+    "    y_test_std = y_train[test]\n",
+    "X_train_std.shape, y_train_std.shape\n",
+    "X_test_std.shape, y_test_std.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 222,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "   kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
-       " 1,\n",
-       " 3,\n",
-       " None,\n",
-       " -6.522347109745663]"
+       "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 222,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "best_attempt"
+    "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n",
+    "model.fit(X_train_std, y_train_std)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 234,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[[SVR(C=5734, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
-       "  1,\n",
-       "  3,\n",
-       "  PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-       "    svd_solver='auto', tol=0.0, whiten=False),\n",
-       "  -7.332691708334687],\n",
-       " [SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
-       "  1,\n",
-       "  3,\n",
-       "  None,\n",
-       "  -6.522347109745663],\n",
-       " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
-       "  1,\n",
-       "  4,\n",
-       "  PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n",
-       "    svd_solver='auto', tol=0.0, whiten=False),\n",
-       "  -7.8651335759783985],\n",
-       " [SVR(C=860, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "    kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False),\n",
-       "  1,\n",
-       "  4,\n",
-       "  None,\n",
-       "  -6.53493877991638]]"
+       "(6.7785087719298245, 10.959317651673116)"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 234,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "results"
+    "predictions = model.predict(X_test_std)\n",
+    "predictions = list(map(lambda x: int(np.round(x)), predictions))\n",
+    "errors = list(map(abs, predictions - y_test_std))\n",
+    "np.mean(errors), np.std(errors)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Predict"
+    "# One by one prediction with noise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictions=[]\n",
+    "for idx in range(X_test_1.shape[0]):\n",
+    "    x = pipeline.transform(X_test_1.loc[idx:idx,:])\n",
+    "    pred = model.predict(x)\n",
+    "    pred = int(np.round(pred))\n",
+    "    pipeline.named_steps['l_infected'].append_y(pred)\n",
+    "    predictions.append(pred)\n",
+    "len(predictions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Submission"
    ]
   },
   {
@@ -738,7 +574,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = SVR()"
+    "submit = pd.DataFrame(predictions, columns=['total_cases'])\n",
+    "x_3 = X_test_1.iloc[:,:3].copy()\n",
+    "submit = pd.concat([x_3, submit], axis=1)\n",
+    "submit.to_csv('data/submit.csv', index=False)"
    ]
   }
  ],
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
index 1830dce..9854129 100644
--- a/utils/LastInfected.py
+++ b/utils/LastInfected.py
@@ -2,33 +2,50 @@
 from collections import deque
 import numpy as np
 import pandas as pd
+from random import gauss, choice
 
 class LastInfected(BaseEstimator, TransformerMixin):
-    def __init__(self, weeks=1, new_attributes_prefix='last_infected_', copy=True):
+    def __init__(self, weeks=1, new_attributes_prefix='last_infected_', add_noise=False, noise_mean=None, noise_std=None, copy=True):
         self.weeks=weeks
         self.new_attributes_prefix = new_attributes_prefix
         self.copy=copy
         dq = deque([0 for _ in range(weeks)])
         self.last = {'sj': dq.copy(), 'iq': dq.copy()}
+        self.add_noise = add_noise
+        self.noise_mean = noise_mean
+        self.noise_std = noise_std
+
+        self.first = True
     
     def fit(self, X, y):
-        self.y = y
+        self.y = y.to_list()
         return self
     
     def transform(self, X, model=None):
         if self.copy:
             X = X.copy()
         
+        X.reset_index(drop=True, inplace=True)
+
         r = np.ndarray(shape=[X.shape[0], self.weeks])
 
-        for idx, n_infected in enumerate(self.y):
-            city = X.loc[idx, 'city']
-            r[idx] = self.last[city]
-            self.last[city].appendleft(n_infected)
-            self.last[city].pop()
+        for idx, x in X.iterrows():
+            self.city = x['city']
+            r[idx] = self.last[self.city]
+            if self.first:
+                self.append_y(self.y[idx])
 
         r = pd.DataFrame(r, columns=[self.new_attributes_prefix + str(week) for week in range(self.weeks)])
         
         X = pd.concat([X, r], axis=1)
 
-        return X
\ No newline at end of file
+        self.first=False
+
+        return X
+
+    def append_y(self, new_y):
+        if self.add_noise:
+            noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std)))
+            new_y += noise
+        self.last[self.city].appendleft(new_y)
+        self.last[self.city].pop()
\ No newline at end of file
diff --git a/utils/LastWeeks.py b/utils/LastWeeks.py
index 0de33dd..8591f95 100644
--- a/utils/LastWeeks.py
+++ b/utils/LastWeeks.py
@@ -22,6 +22,8 @@ def transform(self, X):
         if self.copy:
             X = X.copy()
 
+        X.reset_index(drop=True, inplace=True)
+
         r = np.ndarray(shape=[X.shape[0], self.weeks, len(self.attributes)])
 
         for idx, week in X.iterrows():

From 04d191308ce84c9580792442988cdf4dfa16b1ae Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 00:56:46 +0100
Subject: [PATCH 14/24] finding optimal SVR

---
 models.ipynb | 70 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 0f971f8..cafc207 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -145,7 +145,7 @@
    "outputs": [],
    "source": [
     "k_folds=10\n",
-    "n_iter_search = 5\n",
+    "n_iter_search = 20\n",
     "C = sp_randint(0, 10000)\n",
     "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n",
     "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
@@ -380,7 +380,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -524,7 +524,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -537,19 +537,64 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "SVR_optimizer.fit(X_train, y_train)\n",
+    "model=SVR_optimizer.best_estimator_\n",
+    "SVR_optimizer.best_score_"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "model = SVR(kernel= 'linear', C=5191, gamma='scale')\n",
     "model.fit(X_train, y_train)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "416"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "predictions=[]\n",
     "for idx in range(X_test_1.shape[0]):\n",
@@ -570,7 +615,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -579,6 +624,13 @@
     "submit = pd.concat([x_3, submit], axis=1)\n",
     "submit.to_csv('data/submit.csv', index=False)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 31306cd5c4cc16bf7e853d36d718c4b84c1c8c95 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 00:59:54 +0100
Subject: [PATCH 15/24] tiny fix so that noise doesn't include negatives

---
 models.ipynb          | 55 ++++++++-----------------------------------
 utils/LastInfected.py |  2 ++
 2 files changed, 12 insertions(+), 45 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index cafc207..7644fbc 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -380,7 +380,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -524,7 +524,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -537,21 +537,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "SVR_optimizer.fit(X_train, y_train)\n",
     "model=SVR_optimizer.best_estimator_\n",
@@ -560,41 +548,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "SVR(C=5191, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "model.fit(X_train, y_train)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "416"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "predictions=[]\n",
     "for idx in range(X_test_1.shape[0]):\n",
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
index 9854129..332c59c 100644
--- a/utils/LastInfected.py
+++ b/utils/LastInfected.py
@@ -47,5 +47,7 @@ def append_y(self, new_y):
         if self.add_noise:
             noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std)))
             new_y += noise
+            if new_y < 0:
+                new_y = 0
         self.last[self.city].appendleft(new_y)
         self.last[self.city].pop()
\ No newline at end of file

From 3d7fb4344fa9059408cdaa3da67a598090cda6a3 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 10:11:11 +0100
Subject: [PATCH 16/24] after optimizing, again

---
 models.ipynb | 94 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 19 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 7644fbc..5e774b7 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -170,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -258,18 +258,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Best score of 10.757898351648352 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
+      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
+      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
+      "           min_samples_split=0.08977730688967958,\n",
+      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
+      "           splitter='best')\n",
+      "1/4\t\n",
+      "Best score of 8.57545699492815 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
+      "           max_features='auto', max_leaf_nodes=None,\n",
+      "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+      "           min_samples_leaf=1, min_samples_split=2,\n",
+      "           min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n",
+      "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n",
+      "2/4\t3/4\t4/4\t"
+     ]
+    }
+   ],
    "source": [
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
     "from sklearn.decomposition import PCA\n",
     "\n",
-    "optimizers=[SVR_optimizer, Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]\n",
-    "weeks = [1,2]\n",
-    "weeks_infected = [3,4]\n",
-    "pca = [PCA(0.95), None]\n",
+    "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]#, SVR_optimizer]\n",
+    "weeks = [1]\n",
+    "weeks_infected = [3]\n",
+    "pca = [None]\n",
     "\n",
     "n_total = len(optimizers) * len(weeks) * len(weeks_infected) * len(pca)\n",
     "\n",
@@ -281,7 +303,7 @@
     "    for w in weeks:\n",
     "        for wi in weeks_infected:\n",
     "            for p in pca:\n",
-    "                pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, pca=p)\n",
+    "                pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n",
     "                pipeline.fit(X_train_1, y_train)\n",
     "                score = pipeline.named_steps['est_opt'].best_score_\n",
     "                best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n",
@@ -380,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -524,7 +546,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -537,9 +559,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "-8.436222184081323"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "SVR_optimizer.fit(X_train, y_train)\n",
     "model=SVR_optimizer.best_estimator_\n",
@@ -548,18 +581,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "model.fit(X_train, y_train)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "416"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "predictions=[]\n",
     "for idx in range(X_test_1.shape[0]):\n",

From 9e33ca23526a897651aa95fd8065ae03725ce17b Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 10:44:26 +0100
Subject: [PATCH 17/24] tried disabling the double layer of noise

---
 models.ipynb          | 1150 ++++++++++++++++++++++++++++++++++++++++-
 utils/LastInfected.py |    1 +
 2 files changed, 1146 insertions(+), 5 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 5e774b7..4326211 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -541,12 +541,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# One by one prediction with noise"
+    "# One by one prediction with noise\n",
+    "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -602,7 +603,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -611,7 +625,7 @@
        "416"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -636,7 +650,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -646,6 +660,1132 @@
     "submit.to_csv('data/submit.csv', index=False)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>city</th>\n",
+       "      <th>year</th>\n",
+       "      <th>weekofyear</th>\n",
+       "      <th>total_cases</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>18</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>19</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>20</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>21</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>22</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>23</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>24</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>25</td>\n",
+       "      <td>18</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>26</td>\n",
+       "      <td>20</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>27</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>28</td>\n",
+       "      <td>26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>29</td>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>30</td>\n",
+       "      <td>36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>31</td>\n",
+       "      <td>38</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>32</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>33</td>\n",
+       "      <td>35</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>34</td>\n",
+       "      <td>41</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>35</td>\n",
+       "      <td>46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>36</td>\n",
+       "      <td>46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>37</td>\n",
+       "      <td>47</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>38</td>\n",
+       "      <td>46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>39</td>\n",
+       "      <td>42</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>40</td>\n",
+       "      <td>52</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>41</td>\n",
+       "      <td>52</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>42</td>\n",
+       "      <td>48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>43</td>\n",
+       "      <td>49</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>44</td>\n",
+       "      <td>45</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>45</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>46</td>\n",
+       "      <td>44</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>47</td>\n",
+       "      <td>46</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>386</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>48</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>387</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>49</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>388</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>50</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>389</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>51</td>\n",
+       "      <td>-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>390</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>391</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>392</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>3</td>\n",
+       "      <td>-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>393</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4</td>\n",
+       "      <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>394</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>9</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>10</td>\n",
+       "      <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>400</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>11</td>\n",
+       "      <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>401</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>12</td>\n",
+       "      <td>-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>402</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>403</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>14</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>404</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>405</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>16</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>406</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>17</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>407</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>18</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>408</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>19</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>409</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>20</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>410</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>21</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>411</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>22</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>412</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>23</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>413</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>24</td>\n",
+       "      <td>-2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>414</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>25</td>\n",
+       "      <td>-1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>415</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>26</td>\n",
+       "      <td>-2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>416 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    city  year  weekofyear  total_cases\n",
+       "0     sj  2008          18            3\n",
+       "1     sj  2008          19            2\n",
+       "2     sj  2008          20            2\n",
+       "3     sj  2008          21            3\n",
+       "4     sj  2008          22            8\n",
+       "5     sj  2008          23            8\n",
+       "6     sj  2008          24           10\n",
+       "7     sj  2008          25           18\n",
+       "8     sj  2008          26           20\n",
+       "9     sj  2008          27           25\n",
+       "10    sj  2008          28           26\n",
+       "11    sj  2008          29           31\n",
+       "12    sj  2008          30           36\n",
+       "13    sj  2008          31           38\n",
+       "14    sj  2008          32           39\n",
+       "15    sj  2008          33           35\n",
+       "16    sj  2008          34           41\n",
+       "17    sj  2008          35           46\n",
+       "18    sj  2008          36           46\n",
+       "19    sj  2008          37           47\n",
+       "20    sj  2008          38           46\n",
+       "21    sj  2008          39           42\n",
+       "22    sj  2008          40           52\n",
+       "23    sj  2008          41           52\n",
+       "24    sj  2008          42           48\n",
+       "25    sj  2008          43           49\n",
+       "26    sj  2008          44           45\n",
+       "27    sj  2008          45           44\n",
+       "28    sj  2008          46           44\n",
+       "29    sj  2008          47           46\n",
+       "..   ...   ...         ...          ...\n",
+       "386   iq  2012          48            6\n",
+       "387   iq  2012          49            0\n",
+       "388   iq  2012          50            0\n",
+       "389   iq  2012          51           -2\n",
+       "390   iq  2013           1            1\n",
+       "391   iq  2013           2           -2\n",
+       "392   iq  2013           3           -2\n",
+       "393   iq  2013           4           -1\n",
+       "394   iq  2013           5            1\n",
+       "395   iq  2013           6            4\n",
+       "396   iq  2013           7            0\n",
+       "397   iq  2013           8            4\n",
+       "398   iq  2013           9            1\n",
+       "399   iq  2013          10           -1\n",
+       "400   iq  2013          11           -1\n",
+       "401   iq  2013          12           -2\n",
+       "402   iq  2013          13            2\n",
+       "403   iq  2013          14            7\n",
+       "404   iq  2013          15            2\n",
+       "405   iq  2013          16            3\n",
+       "406   iq  2013          17            6\n",
+       "407   iq  2013          18            2\n",
+       "408   iq  2013          19            0\n",
+       "409   iq  2013          20            3\n",
+       "410   iq  2013          21            1\n",
+       "411   iq  2013          22            3\n",
+       "412   iq  2013          23            0\n",
+       "413   iq  2013          24           -2\n",
+       "414   iq  2013          25           -1\n",
+       "415   iq  2013          26           -2\n",
+       "\n",
+       "[416 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "submit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ndvi_ne</th>\n",
+       "      <th>ndvi_nw</th>\n",
+       "      <th>ndvi_se</th>\n",
+       "      <th>ndvi_sw</th>\n",
+       "      <th>precipitation_amt_mm</th>\n",
+       "      <th>reanalysis_air_temp_k</th>\n",
+       "      <th>reanalysis_avg_temp_k</th>\n",
+       "      <th>reanalysis_dew_point_temp_k</th>\n",
+       "      <th>reanalysis_max_air_temp_k</th>\n",
+       "      <th>reanalysis_min_air_temp_k</th>\n",
+       "      <th>...</th>\n",
+       "      <th>last_weeks_0_reanalysis_specific_humidity_g_per_kg</th>\n",
+       "      <th>last_weeks_0_reanalysis_tdtr_k</th>\n",
+       "      <th>last_weeks_0_station_avg_temp_c</th>\n",
+       "      <th>last_weeks_0_station_diur_temp_rng_c</th>\n",
+       "      <th>last_weeks_0_station_max_temp_c</th>\n",
+       "      <th>last_weeks_0_station_min_temp_c</th>\n",
+       "      <th>last_weeks_0_station_precip_mm</th>\n",
+       "      <th>last_infected_0</th>\n",
+       "      <th>last_infected_1</th>\n",
+       "      <th>last_infected_2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.122600</td>\n",
+       "      <td>0.103725</td>\n",
+       "      <td>0.198483</td>\n",
+       "      <td>0.177617</td>\n",
+       "      <td>12.42</td>\n",
+       "      <td>297.572857</td>\n",
+       "      <td>297.742857</td>\n",
+       "      <td>292.414286</td>\n",
+       "      <td>299.8</td>\n",
+       "      <td>295.9</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.087143</td>\n",
+       "      <td>2.857143</td>\n",
+       "      <td>27.400000</td>\n",
+       "      <td>7.364286</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>23.8</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.169900</td>\n",
+       "      <td>0.142175</td>\n",
+       "      <td>0.162357</td>\n",
+       "      <td>0.155486</td>\n",
+       "      <td>22.82</td>\n",
+       "      <td>298.211429</td>\n",
+       "      <td>298.442857</td>\n",
+       "      <td>293.951429</td>\n",
+       "      <td>300.9</td>\n",
+       "      <td>296.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.012857</td>\n",
+       "      <td>2.628571</td>\n",
+       "      <td>25.442857</td>\n",
+       "      <td>6.900000</td>\n",
+       "      <td>29.4</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.032250</td>\n",
+       "      <td>0.172967</td>\n",
+       "      <td>0.157200</td>\n",
+       "      <td>0.170843</td>\n",
+       "      <td>34.54</td>\n",
+       "      <td>298.781429</td>\n",
+       "      <td>298.878571</td>\n",
+       "      <td>295.434286</td>\n",
+       "      <td>300.5</td>\n",
+       "      <td>297.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>15.372857</td>\n",
+       "      <td>2.371429</td>\n",
+       "      <td>26.714286</td>\n",
+       "      <td>6.371429</td>\n",
+       "      <td>31.7</td>\n",
+       "      <td>22.2</td>\n",
+       "      <td>8.6</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.128633</td>\n",
+       "      <td>0.245067</td>\n",
+       "      <td>0.227557</td>\n",
+       "      <td>0.235886</td>\n",
+       "      <td>15.36</td>\n",
+       "      <td>298.987143</td>\n",
+       "      <td>299.228571</td>\n",
+       "      <td>295.310000</td>\n",
+       "      <td>301.4</td>\n",
+       "      <td>297.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.848571</td>\n",
+       "      <td>2.300000</td>\n",
+       "      <td>26.714286</td>\n",
+       "      <td>6.485714</td>\n",
+       "      <td>32.2</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>41.4</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>9.0</td>\n",
+       "      <td>6.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.196200</td>\n",
+       "      <td>0.262200</td>\n",
+       "      <td>0.251200</td>\n",
+       "      <td>0.247340</td>\n",
+       "      <td>7.52</td>\n",
+       "      <td>299.518571</td>\n",
+       "      <td>299.664286</td>\n",
+       "      <td>295.821429</td>\n",
+       "      <td>301.9</td>\n",
+       "      <td>297.5</td>\n",
+       "      <td>...</td>\n",
+       "      <td>16.672857</td>\n",
+       "      <td>2.428571</td>\n",
+       "      <td>27.471429</td>\n",
+       "      <td>6.771429</td>\n",
+       "      <td>33.3</td>\n",
+       "      <td>23.3</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>9.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>0.196200</td>\n",
+       "      <td>0.174850</td>\n",
+       "      <td>0.254314</td>\n",
+       "      <td>0.181743</td>\n",
+       "      <td>9.58</td>\n",
+       "      <td>299.630000</td>\n",
+       "      <td>299.764286</td>\n",
+       "      <td>295.851429</td>\n",
+       "      <td>302.4</td>\n",
+       "      <td>298.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.210000</td>\n",
+       "      <td>3.014286</td>\n",
+       "      <td>28.942857</td>\n",
+       "      <td>9.371429</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>23.9</td>\n",
+       "      <td>5.8</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>0.112900</td>\n",
+       "      <td>0.092800</td>\n",
+       "      <td>0.205071</td>\n",
+       "      <td>0.210271</td>\n",
+       "      <td>3.48</td>\n",
+       "      <td>299.207143</td>\n",
+       "      <td>299.221429</td>\n",
+       "      <td>295.865714</td>\n",
+       "      <td>301.3</td>\n",
+       "      <td>297.7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.212857</td>\n",
+       "      <td>2.100000</td>\n",
+       "      <td>28.114286</td>\n",
+       "      <td>6.942857</td>\n",
+       "      <td>34.4</td>\n",
+       "      <td>23.9</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>0.072500</td>\n",
+       "      <td>0.072500</td>\n",
+       "      <td>0.151471</td>\n",
+       "      <td>0.133029</td>\n",
+       "      <td>151.12</td>\n",
+       "      <td>299.591429</td>\n",
+       "      <td>299.528571</td>\n",
+       "      <td>296.531429</td>\n",
+       "      <td>300.6</td>\n",
+       "      <td>298.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.234286</td>\n",
+       "      <td>2.042857</td>\n",
+       "      <td>27.414286</td>\n",
+       "      <td>6.771429</td>\n",
+       "      <td>32.2</td>\n",
+       "      <td>23.3</td>\n",
+       "      <td>29.7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>7.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>0.102450</td>\n",
+       "      <td>0.146175</td>\n",
+       "      <td>0.125571</td>\n",
+       "      <td>0.123600</td>\n",
+       "      <td>19.32</td>\n",
+       "      <td>299.578571</td>\n",
+       "      <td>299.557143</td>\n",
+       "      <td>296.378571</td>\n",
+       "      <td>302.1</td>\n",
+       "      <td>297.7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.977143</td>\n",
+       "      <td>1.571429</td>\n",
+       "      <td>28.371429</td>\n",
+       "      <td>7.685714</td>\n",
+       "      <td>33.9</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>0.102450</td>\n",
+       "      <td>0.121550</td>\n",
+       "      <td>0.160683</td>\n",
+       "      <td>0.202567</td>\n",
+       "      <td>14.41</td>\n",
+       "      <td>300.154286</td>\n",
+       "      <td>300.278571</td>\n",
+       "      <td>296.651429</td>\n",
+       "      <td>302.3</td>\n",
+       "      <td>298.7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.790000</td>\n",
+       "      <td>1.885714</td>\n",
+       "      <td>28.328571</td>\n",
+       "      <td>7.385714</td>\n",
+       "      <td>33.9</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0.192875</td>\n",
+       "      <td>0.082350</td>\n",
+       "      <td>0.191943</td>\n",
+       "      <td>0.152929</td>\n",
+       "      <td>22.27</td>\n",
+       "      <td>299.512857</td>\n",
+       "      <td>299.592857</td>\n",
+       "      <td>296.041429</td>\n",
+       "      <td>301.8</td>\n",
+       "      <td>298.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>18.071429</td>\n",
+       "      <td>2.014286</td>\n",
+       "      <td>28.328571</td>\n",
+       "      <td>6.514286</td>\n",
+       "      <td>33.9</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>1.1</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>0.291600</td>\n",
+       "      <td>0.211800</td>\n",
+       "      <td>0.301200</td>\n",
+       "      <td>0.280667</td>\n",
+       "      <td>59.17</td>\n",
+       "      <td>299.667143</td>\n",
+       "      <td>299.750000</td>\n",
+       "      <td>296.334286</td>\n",
+       "      <td>302.0</td>\n",
+       "      <td>297.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.418571</td>\n",
+       "      <td>2.157143</td>\n",
+       "      <td>27.557143</td>\n",
+       "      <td>7.157143</td>\n",
+       "      <td>31.7</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>63.7</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>0.150567</td>\n",
+       "      <td>0.171700</td>\n",
+       "      <td>0.226900</td>\n",
+       "      <td>0.214557</td>\n",
+       "      <td>16.48</td>\n",
+       "      <td>299.558571</td>\n",
+       "      <td>299.635714</td>\n",
+       "      <td>295.960000</td>\n",
+       "      <td>301.8</td>\n",
+       "      <td>297.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.737143</td>\n",
+       "      <td>2.414286</td>\n",
+       "      <td>28.128571</td>\n",
+       "      <td>6.900000</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>23.9</td>\n",
+       "      <td>12.2</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>0.150567</td>\n",
+       "      <td>0.247150</td>\n",
+       "      <td>0.379700</td>\n",
+       "      <td>0.381357</td>\n",
+       "      <td>32.66</td>\n",
+       "      <td>299.862857</td>\n",
+       "      <td>299.950000</td>\n",
+       "      <td>296.172857</td>\n",
+       "      <td>303.0</td>\n",
+       "      <td>298.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.341429</td>\n",
+       "      <td>2.071429</td>\n",
+       "      <td>28.114286</td>\n",
+       "      <td>6.357143</td>\n",
+       "      <td>31.7</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>32.6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>0.150567</td>\n",
+       "      <td>0.064333</td>\n",
+       "      <td>0.164443</td>\n",
+       "      <td>0.138857</td>\n",
+       "      <td>28.80</td>\n",
+       "      <td>300.391429</td>\n",
+       "      <td>300.478571</td>\n",
+       "      <td>296.532857</td>\n",
+       "      <td>302.5</td>\n",
+       "      <td>298.8</td>\n",
+       "      <td>...</td>\n",
+       "      <td>17.594286</td>\n",
+       "      <td>2.585714</td>\n",
+       "      <td>28.242857</td>\n",
+       "      <td>8.085714</td>\n",
+       "      <td>34.4</td>\n",
+       "      <td>22.8</td>\n",
+       "      <td>37.6</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>15 rows × 43 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     ndvi_ne   ndvi_nw   ndvi_se   ndvi_sw  precipitation_amt_mm  \\\n",
+       "0   0.122600  0.103725  0.198483  0.177617                 12.42   \n",
+       "1   0.169900  0.142175  0.162357  0.155486                 22.82   \n",
+       "2   0.032250  0.172967  0.157200  0.170843                 34.54   \n",
+       "3   0.128633  0.245067  0.227557  0.235886                 15.36   \n",
+       "4   0.196200  0.262200  0.251200  0.247340                  7.52   \n",
+       "5   0.196200  0.174850  0.254314  0.181743                  9.58   \n",
+       "6   0.112900  0.092800  0.205071  0.210271                  3.48   \n",
+       "7   0.072500  0.072500  0.151471  0.133029                151.12   \n",
+       "8   0.102450  0.146175  0.125571  0.123600                 19.32   \n",
+       "9   0.102450  0.121550  0.160683  0.202567                 14.41   \n",
+       "10  0.192875  0.082350  0.191943  0.152929                 22.27   \n",
+       "11  0.291600  0.211800  0.301200  0.280667                 59.17   \n",
+       "12  0.150567  0.171700  0.226900  0.214557                 16.48   \n",
+       "13  0.150567  0.247150  0.379700  0.381357                 32.66   \n",
+       "14  0.150567  0.064333  0.164443  0.138857                 28.80   \n",
+       "\n",
+       "    reanalysis_air_temp_k  reanalysis_avg_temp_k  reanalysis_dew_point_temp_k  \\\n",
+       "0              297.572857             297.742857                   292.414286   \n",
+       "1              298.211429             298.442857                   293.951429   \n",
+       "2              298.781429             298.878571                   295.434286   \n",
+       "3              298.987143             299.228571                   295.310000   \n",
+       "4              299.518571             299.664286                   295.821429   \n",
+       "5              299.630000             299.764286                   295.851429   \n",
+       "6              299.207143             299.221429                   295.865714   \n",
+       "7              299.591429             299.528571                   296.531429   \n",
+       "8              299.578571             299.557143                   296.378571   \n",
+       "9              300.154286             300.278571                   296.651429   \n",
+       "10             299.512857             299.592857                   296.041429   \n",
+       "11             299.667143             299.750000                   296.334286   \n",
+       "12             299.558571             299.635714                   295.960000   \n",
+       "13             299.862857             299.950000                   296.172857   \n",
+       "14             300.391429             300.478571                   296.532857   \n",
+       "\n",
+       "    reanalysis_max_air_temp_k  reanalysis_min_air_temp_k  ...  \\\n",
+       "0                       299.8                      295.9  ...   \n",
+       "1                       300.9                      296.4  ...   \n",
+       "2                       300.5                      297.3  ...   \n",
+       "3                       301.4                      297.0  ...   \n",
+       "4                       301.9                      297.5  ...   \n",
+       "5                       302.4                      298.1  ...   \n",
+       "6                       301.3                      297.7  ...   \n",
+       "7                       300.6                      298.4  ...   \n",
+       "8                       302.1                      297.7  ...   \n",
+       "9                       302.3                      298.7  ...   \n",
+       "10                      301.8                      298.0  ...   \n",
+       "11                      302.0                      297.3  ...   \n",
+       "12                      301.8                      297.1  ...   \n",
+       "13                      303.0                      298.3  ...   \n",
+       "14                      302.5                      298.8  ...   \n",
+       "\n",
+       "    last_weeks_0_reanalysis_specific_humidity_g_per_kg  \\\n",
+       "0                                           17.087143    \n",
+       "1                                           14.012857    \n",
+       "2                                           15.372857    \n",
+       "3                                           16.848571    \n",
+       "4                                           16.672857    \n",
+       "5                                           17.210000    \n",
+       "6                                           17.212857    \n",
+       "7                                           17.234286    \n",
+       "8                                           17.977143    \n",
+       "9                                           17.790000    \n",
+       "10                                          18.071429    \n",
+       "11                                          17.418571    \n",
+       "12                                          17.737143    \n",
+       "13                                          17.341429    \n",
+       "14                                          17.594286    \n",
+       "\n",
+       "    last_weeks_0_reanalysis_tdtr_k  last_weeks_0_station_avg_temp_c  \\\n",
+       "0                         2.857143                        27.400000   \n",
+       "1                         2.628571                        25.442857   \n",
+       "2                         2.371429                        26.714286   \n",
+       "3                         2.300000                        26.714286   \n",
+       "4                         2.428571                        27.471429   \n",
+       "5                         3.014286                        28.942857   \n",
+       "6                         2.100000                        28.114286   \n",
+       "7                         2.042857                        27.414286   \n",
+       "8                         1.571429                        28.371429   \n",
+       "9                         1.885714                        28.328571   \n",
+       "10                        2.014286                        28.328571   \n",
+       "11                        2.157143                        27.557143   \n",
+       "12                        2.414286                        28.128571   \n",
+       "13                        2.071429                        28.114286   \n",
+       "14                        2.585714                        28.242857   \n",
+       "\n",
+       "    last_weeks_0_station_diur_temp_rng_c  last_weeks_0_station_max_temp_c  \\\n",
+       "0                               7.364286                             32.8   \n",
+       "1                               6.900000                             29.4   \n",
+       "2                               6.371429                             31.7   \n",
+       "3                               6.485714                             32.2   \n",
+       "4                               6.771429                             33.3   \n",
+       "5                               9.371429                             35.0   \n",
+       "6                               6.942857                             34.4   \n",
+       "7                               6.771429                             32.2   \n",
+       "8                               7.685714                             33.9   \n",
+       "9                               7.385714                             33.9   \n",
+       "10                              6.514286                             33.9   \n",
+       "11                              7.157143                             31.7   \n",
+       "12                              6.900000                             32.8   \n",
+       "13                              6.357143                             31.7   \n",
+       "14                              8.085714                             34.4   \n",
+       "\n",
+       "    last_weeks_0_station_min_temp_c  last_weeks_0_station_precip_mm  \\\n",
+       "0                              22.2                            23.8   \n",
+       "1                              20.0                            16.0   \n",
+       "2                              22.2                             8.6   \n",
+       "3                              22.8                            41.4   \n",
+       "4                              23.3                             4.0   \n",
+       "5                              23.9                             5.8   \n",
+       "6                              23.9                            39.1   \n",
+       "7                              23.3                            29.7   \n",
+       "8                              22.8                            21.1   \n",
+       "9                              22.8                            21.1   \n",
+       "10                             24.4                             1.1   \n",
+       "11                             21.7                            63.7   \n",
+       "12                             23.9                            12.2   \n",
+       "13                             22.8                            32.6   \n",
+       "14                             22.8                            37.6   \n",
+       "\n",
+       "    last_infected_0  last_infected_1  last_infected_2  \n",
+       "0               0.0              0.0              0.0  \n",
+       "1               6.0              0.0              0.0  \n",
+       "2               9.0              6.0              0.0  \n",
+       "3               5.0              9.0              6.0  \n",
+       "4               0.0              5.0              9.0  \n",
+       "5               7.0              0.0              5.0  \n",
+       "6               3.0              7.0              0.0  \n",
+       "7               0.0              3.0              7.0  \n",
+       "8               1.0              0.0              3.0  \n",
+       "9               2.0              1.0              0.0  \n",
+       "10              7.0              2.0              1.0  \n",
+       "11              0.0              7.0              2.0  \n",
+       "12              7.0              0.0              7.0  \n",
+       "13              0.0              7.0              0.0  \n",
+       "14             11.0              0.0              7.0  \n",
+       "\n",
+       "[15 rows x 43 columns]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train.head(15)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
+       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
index 332c59c..41ab05f 100644
--- a/utils/LastInfected.py
+++ b/utils/LastInfected.py
@@ -47,6 +47,7 @@ def append_y(self, new_y):
         if self.add_noise:
             noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std)))
             new_y += noise
+            print('With noise {}!!'.format(new_y))
             if new_y < 0:
                 new_y = 0
         self.last[self.city].appendleft(new_y)

From 75624bfa59f6275881593316027189e522d2c366 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 13:03:03 +0100
Subject: [PATCH 18/24] made the noise less agressive

---
 OurPipeline.py        |    2 +-
 models.ipynb          | 1355 ++++++++++-------------------------------
 utils/LastInfected.py |    5 +-
 3 files changed, 326 insertions(+), 1036 deletions(-)

diff --git a/OurPipeline.py b/OurPipeline.py
index 40654cd..8a1d1dd 100644
--- a/OurPipeline.py
+++ b/OurPipeline.py
@@ -13,7 +13,7 @@ def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, p
         ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)),
         ('l_infected', LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)),
         ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])),
-        ('scaler', StandardScaler()),
+        #('scaler', StandardScaler()),
         ('pca', pca),
         ('est_opt', estimator_optimizer),
     ]
diff --git a/models.ipynb b/models.ipynb
index 4326211..011b2e3 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -537,62 +537,92 @@
     "np.mean(errors), np.std(errors)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(6.49747, 4.943615438027113)"
+      ]
+     },
+     "execution_count": 124,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from random import choice, gauss\n",
+    "r=[]\n",
+    "for _ in range(100000):\n",
+    "    r.append(int(np.round(choice([-1,1]) * gauss(mu=0, sigma=8.2))))\n",
+    "r=np.abs(r)\n",
+    "np.mean(r), np.std(r)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# One by one prediction with noise\n",
-    "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model."
+    "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n",
+    "* A very likely guess is that the errors when y is low is much smaller than when y is high."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [],
    "source": [
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
     "\n",
-    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=0, noise_std=8.2, pca=None)\n",
     "X_train = pipeline.fit_transform(X_train_1, y_train)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 121,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "-8.436222184081323"
+       "-8.57545699492815"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 121,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "SVR_optimizer.fit(X_train, y_train)\n",
-    "model=SVR_optimizer.best_estimator_\n",
-    "SVR_optimizer.best_score_"
+    "#Forest_optimizer.fit(X_train, y_train)\n",
+    "model=Forest_optimizer.best_estimator_\n",
+    "Forest_optimizer.best_score_"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 127,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
+       "           max_features='auto', max_leaf_nodes=None,\n",
+       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "           min_samples_leaf=1, min_samples_split=2,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n",
+       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 127,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -603,20 +633,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%autoreload\n",
-    "from OurPipeline import create_pipeline\n",
-    "\n",
-    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
-    "X_train = pipeline.fit_transform(X_train_1, y_train)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 141,
    "metadata": {},
    "outputs": [
     {
@@ -625,15 +642,23 @@
        "416"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 141,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
+    "pipeline.fit_transform(X_train_1, y_train)\n",
+    "\n",
+    "X_test_f = pd.DataFrame([], columns=attr[4:])\n",
     "predictions=[]\n",
     "for idx in range(X_test_1.shape[0]):\n",
     "    x = pipeline.transform(X_test_1.loc[idx:idx,:])\n",
+    "    X_test_f = X_test_f.append(x, sort=False, ignore_index=True)\n",
     "    pred = model.predict(x)\n",
     "    pred = int(np.round(pred))\n",
     "    pipeline.named_steps['l_infected'].append_y(pred)\n",
@@ -650,7 +675,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 148,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -662,548 +687,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>city</th>\n",
-       "      <th>year</th>\n",
-       "      <th>weekofyear</th>\n",
-       "      <th>total_cases</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>18</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>19</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>20</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>21</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>22</td>\n",
-       "      <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>23</td>\n",
-       "      <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>24</td>\n",
-       "      <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>25</td>\n",
-       "      <td>18</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>26</td>\n",
-       "      <td>20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>27</td>\n",
-       "      <td>25</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>28</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>29</td>\n",
-       "      <td>31</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>30</td>\n",
-       "      <td>36</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>31</td>\n",
-       "      <td>38</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>32</td>\n",
-       "      <td>39</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>33</td>\n",
-       "      <td>35</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>34</td>\n",
-       "      <td>41</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>35</td>\n",
-       "      <td>46</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>36</td>\n",
-       "      <td>46</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>37</td>\n",
-       "      <td>47</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>38</td>\n",
-       "      <td>46</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>39</td>\n",
-       "      <td>42</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>40</td>\n",
-       "      <td>52</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>41</td>\n",
-       "      <td>52</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>42</td>\n",
-       "      <td>48</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>43</td>\n",
-       "      <td>49</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>44</td>\n",
-       "      <td>45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>45</td>\n",
-       "      <td>44</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>46</td>\n",
-       "      <td>44</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>47</td>\n",
-       "      <td>46</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>386</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>48</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>387</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>49</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>388</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>50</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>389</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>51</td>\n",
-       "      <td>-2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>390</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>391</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>392</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>393</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>4</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>394</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>6</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>7</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>8</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>9</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>10</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>400</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>11</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>401</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>12</td>\n",
-       "      <td>-2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>402</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>13</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>403</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>14</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>404</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>15</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>405</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>16</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>406</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>17</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>407</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>18</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>408</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>19</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>409</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>20</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>410</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>21</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>411</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>22</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>412</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>23</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>413</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>24</td>\n",
-       "      <td>-2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>414</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>25</td>\n",
-       "      <td>-1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>415</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>26</td>\n",
-       "      <td>-2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>416 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    city  year  weekofyear  total_cases\n",
-       "0     sj  2008          18            3\n",
-       "1     sj  2008          19            2\n",
-       "2     sj  2008          20            2\n",
-       "3     sj  2008          21            3\n",
-       "4     sj  2008          22            8\n",
-       "5     sj  2008          23            8\n",
-       "6     sj  2008          24           10\n",
-       "7     sj  2008          25           18\n",
-       "8     sj  2008          26           20\n",
-       "9     sj  2008          27           25\n",
-       "10    sj  2008          28           26\n",
-       "11    sj  2008          29           31\n",
-       "12    sj  2008          30           36\n",
-       "13    sj  2008          31           38\n",
-       "14    sj  2008          32           39\n",
-       "15    sj  2008          33           35\n",
-       "16    sj  2008          34           41\n",
-       "17    sj  2008          35           46\n",
-       "18    sj  2008          36           46\n",
-       "19    sj  2008          37           47\n",
-       "20    sj  2008          38           46\n",
-       "21    sj  2008          39           42\n",
-       "22    sj  2008          40           52\n",
-       "23    sj  2008          41           52\n",
-       "24    sj  2008          42           48\n",
-       "25    sj  2008          43           49\n",
-       "26    sj  2008          44           45\n",
-       "27    sj  2008          45           44\n",
-       "28    sj  2008          46           44\n",
-       "29    sj  2008          47           46\n",
-       "..   ...   ...         ...          ...\n",
-       "386   iq  2012          48            6\n",
-       "387   iq  2012          49            0\n",
-       "388   iq  2012          50            0\n",
-       "389   iq  2012          51           -2\n",
-       "390   iq  2013           1            1\n",
-       "391   iq  2013           2           -2\n",
-       "392   iq  2013           3           -2\n",
-       "393   iq  2013           4           -1\n",
-       "394   iq  2013           5            1\n",
-       "395   iq  2013           6            4\n",
-       "396   iq  2013           7            0\n",
-       "397   iq  2013           8            4\n",
-       "398   iq  2013           9            1\n",
-       "399   iq  2013          10           -1\n",
-       "400   iq  2013          11           -1\n",
-       "401   iq  2013          12           -2\n",
-       "402   iq  2013          13            2\n",
-       "403   iq  2013          14            7\n",
-       "404   iq  2013          15            2\n",
-       "405   iq  2013          16            3\n",
-       "406   iq  2013          17            6\n",
-       "407   iq  2013          18            2\n",
-       "408   iq  2013          19            0\n",
-       "409   iq  2013          20            3\n",
-       "410   iq  2013          21            1\n",
-       "411   iq  2013          22            3\n",
-       "412   iq  2013          23            0\n",
-       "413   iq  2013          24           -2\n",
-       "414   iq  2013          25           -1\n",
-       "415   iq  2013          26           -2\n",
-       "\n",
-       "[416 rows x 4 columns]"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "submit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 149,
    "metadata": {},
    "outputs": [
     {
@@ -1238,7 +722,6 @@
        "      <th>reanalysis_max_air_temp_k</th>\n",
        "      <th>reanalysis_min_air_temp_k</th>\n",
        "      <th>...</th>\n",
-       "      <th>last_weeks_0_reanalysis_specific_humidity_g_per_kg</th>\n",
        "      <th>last_weeks_0_reanalysis_tdtr_k</th>\n",
        "      <th>last_weeks_0_station_avg_temp_c</th>\n",
        "      <th>last_weeks_0_station_diur_temp_rng_c</th>\n",
@@ -1248,542 +731,350 @@
        "      <th>last_infected_0</th>\n",
        "      <th>last_infected_1</th>\n",
        "      <th>last_infected_2</th>\n",
+       "      <th>pred</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>0.122600</td>\n",
-       "      <td>0.103725</td>\n",
-       "      <td>0.198483</td>\n",
-       "      <td>0.177617</td>\n",
-       "      <td>12.42</td>\n",
-       "      <td>297.572857</td>\n",
-       "      <td>297.742857</td>\n",
-       "      <td>292.414286</td>\n",
-       "      <td>299.8</td>\n",
-       "      <td>295.9</td>\n",
+       "      <td>-0.018900</td>\n",
+       "      <td>-0.018900</td>\n",
+       "      <td>0.102729</td>\n",
+       "      <td>0.091200</td>\n",
+       "      <td>78.60</td>\n",
+       "      <td>298.492857</td>\n",
+       "      <td>298.550000</td>\n",
+       "      <td>294.527143</td>\n",
+       "      <td>301.1</td>\n",
+       "      <td>296.4</td>\n",
        "      <td>...</td>\n",
-       "      <td>17.087143</td>\n",
-       "      <td>2.857143</td>\n",
-       "      <td>27.400000</td>\n",
-       "      <td>7.364286</td>\n",
-       "      <td>32.8</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>23.8</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>3.957143</td>\n",
+       "      <td>27.042857</td>\n",
+       "      <td>7.514286</td>\n",
+       "      <td>31.7</td>\n",
+       "      <td>23.3</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.169900</td>\n",
-       "      <td>0.142175</td>\n",
-       "      <td>0.162357</td>\n",
-       "      <td>0.155486</td>\n",
-       "      <td>22.82</td>\n",
-       "      <td>298.211429</td>\n",
-       "      <td>298.442857</td>\n",
-       "      <td>293.951429</td>\n",
-       "      <td>300.9</td>\n",
-       "      <td>296.4</td>\n",
+       "      <td>-0.018000</td>\n",
+       "      <td>-0.012400</td>\n",
+       "      <td>0.082043</td>\n",
+       "      <td>0.072314</td>\n",
+       "      <td>12.56</td>\n",
+       "      <td>298.475714</td>\n",
+       "      <td>298.557143</td>\n",
+       "      <td>294.395714</td>\n",
+       "      <td>300.8</td>\n",
+       "      <td>296.7</td>\n",
        "      <td>...</td>\n",
-       "      <td>14.012857</td>\n",
-       "      <td>2.628571</td>\n",
-       "      <td>25.442857</td>\n",
-       "      <td>6.900000</td>\n",
-       "      <td>29.4</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>16.0</td>\n",
+       "      <td>3.128571</td>\n",
+       "      <td>26.528571</td>\n",
+       "      <td>7.057143</td>\n",
+       "      <td>33.3</td>\n",
+       "      <td>21.7</td>\n",
+       "      <td>75.2</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>0.032250</td>\n",
-       "      <td>0.172967</td>\n",
-       "      <td>0.157200</td>\n",
-       "      <td>0.170843</td>\n",
-       "      <td>34.54</td>\n",
-       "      <td>298.781429</td>\n",
-       "      <td>298.878571</td>\n",
-       "      <td>295.434286</td>\n",
-       "      <td>300.5</td>\n",
-       "      <td>297.3</td>\n",
+       "      <td>-0.001500</td>\n",
+       "      <td>-0.012400</td>\n",
+       "      <td>0.151083</td>\n",
+       "      <td>0.091529</td>\n",
+       "      <td>3.66</td>\n",
+       "      <td>299.455714</td>\n",
+       "      <td>299.357143</td>\n",
+       "      <td>295.308571</td>\n",
+       "      <td>302.2</td>\n",
+       "      <td>296.4</td>\n",
        "      <td>...</td>\n",
-       "      <td>15.372857</td>\n",
-       "      <td>2.371429</td>\n",
-       "      <td>26.714286</td>\n",
-       "      <td>6.371429</td>\n",
-       "      <td>31.7</td>\n",
+       "      <td>2.571429</td>\n",
+       "      <td>26.071429</td>\n",
+       "      <td>5.557143</td>\n",
+       "      <td>30.0</td>\n",
        "      <td>22.2</td>\n",
-       "      <td>8.6</td>\n",
-       "      <td>9.0</td>\n",
+       "      <td>34.3</td>\n",
+       "      <td>7.0</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>5.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.128633</td>\n",
-       "      <td>0.245067</td>\n",
-       "      <td>0.227557</td>\n",
-       "      <td>0.235886</td>\n",
-       "      <td>15.36</td>\n",
-       "      <td>298.987143</td>\n",
-       "      <td>299.228571</td>\n",
-       "      <td>295.310000</td>\n",
-       "      <td>301.4</td>\n",
-       "      <td>297.0</td>\n",
+       "      <td>-0.001500</td>\n",
+       "      <td>-0.019867</td>\n",
+       "      <td>0.124329</td>\n",
+       "      <td>0.125686</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>299.690000</td>\n",
+       "      <td>299.728571</td>\n",
+       "      <td>294.402857</td>\n",
+       "      <td>303.0</td>\n",
+       "      <td>296.9</td>\n",
        "      <td>...</td>\n",
-       "      <td>16.848571</td>\n",
-       "      <td>2.300000</td>\n",
-       "      <td>26.714286</td>\n",
-       "      <td>6.485714</td>\n",
-       "      <td>32.2</td>\n",
+       "      <td>4.428571</td>\n",
+       "      <td>27.928571</td>\n",
+       "      <td>7.785714</td>\n",
+       "      <td>32.8</td>\n",
        "      <td>22.8</td>\n",
-       "      <td>41.4</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>9.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>7.0</td>\n",
        "      <td>6.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>0.196200</td>\n",
-       "      <td>0.262200</td>\n",
-       "      <td>0.251200</td>\n",
-       "      <td>0.247340</td>\n",
-       "      <td>7.52</td>\n",
-       "      <td>299.518571</td>\n",
-       "      <td>299.664286</td>\n",
-       "      <td>295.821429</td>\n",
-       "      <td>301.9</td>\n",
-       "      <td>297.5</td>\n",
+       "      <td>0.056800</td>\n",
+       "      <td>0.039833</td>\n",
+       "      <td>0.062267</td>\n",
+       "      <td>0.075914</td>\n",
+       "      <td>0.76</td>\n",
+       "      <td>299.780000</td>\n",
+       "      <td>299.671429</td>\n",
+       "      <td>294.760000</td>\n",
+       "      <td>302.3</td>\n",
+       "      <td>297.3</td>\n",
        "      <td>...</td>\n",
-       "      <td>16.672857</td>\n",
-       "      <td>2.428571</td>\n",
-       "      <td>27.471429</td>\n",
-       "      <td>6.771429</td>\n",
+       "      <td>4.342857</td>\n",
+       "      <td>28.057143</td>\n",
+       "      <td>6.271429</td>\n",
        "      <td>33.3</td>\n",
-       "      <td>23.3</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>9.0</td>\n",
+       "      <td>24.4</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>0.196200</td>\n",
-       "      <td>0.174850</td>\n",
-       "      <td>0.254314</td>\n",
-       "      <td>0.181743</td>\n",
-       "      <td>9.58</td>\n",
-       "      <td>299.630000</td>\n",
-       "      <td>299.764286</td>\n",
-       "      <td>295.851429</td>\n",
-       "      <td>302.4</td>\n",
-       "      <td>298.1</td>\n",
+       "      <td>-0.044000</td>\n",
+       "      <td>-0.030467</td>\n",
+       "      <td>0.132000</td>\n",
+       "      <td>0.083529</td>\n",
+       "      <td>71.17</td>\n",
+       "      <td>299.768571</td>\n",
+       "      <td>299.728571</td>\n",
+       "      <td>295.314286</td>\n",
+       "      <td>301.9</td>\n",
+       "      <td>297.6</td>\n",
        "      <td>...</td>\n",
-       "      <td>17.210000</td>\n",
-       "      <td>3.014286</td>\n",
-       "      <td>28.942857</td>\n",
-       "      <td>9.371429</td>\n",
-       "      <td>35.0</td>\n",
-       "      <td>23.9</td>\n",
-       "      <td>5.8</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>5.0</td>\n",
+       "      <td>3.542857</td>\n",
+       "      <td>27.614286</td>\n",
+       "      <td>7.085714</td>\n",
+       "      <td>33.3</td>\n",
+       "      <td>23.3</td>\n",
+       "      <td>84.1</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>0.112900</td>\n",
-       "      <td>0.092800</td>\n",
-       "      <td>0.205071</td>\n",
-       "      <td>0.210271</td>\n",
-       "      <td>3.48</td>\n",
-       "      <td>299.207143</td>\n",
-       "      <td>299.221429</td>\n",
-       "      <td>295.865714</td>\n",
-       "      <td>301.3</td>\n",
-       "      <td>297.7</td>\n",
+       "      <td>-0.044300</td>\n",
+       "      <td>-0.024925</td>\n",
+       "      <td>0.132271</td>\n",
+       "      <td>0.159157</td>\n",
+       "      <td>48.99</td>\n",
+       "      <td>300.062857</td>\n",
+       "      <td>300.007143</td>\n",
+       "      <td>295.650000</td>\n",
+       "      <td>302.4</td>\n",
+       "      <td>297.5</td>\n",
        "      <td>...</td>\n",
-       "      <td>17.212857</td>\n",
-       "      <td>2.100000</td>\n",
-       "      <td>28.114286</td>\n",
-       "      <td>6.942857</td>\n",
-       "      <td>34.4</td>\n",
-       "      <td>23.9</td>\n",
-       "      <td>39.1</td>\n",
-       "      <td>3.0</td>\n",
+       "      <td>2.857143</td>\n",
+       "      <td>28.000000</td>\n",
+       "      <td>5.171429</td>\n",
+       "      <td>32.8</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>27.7</td>\n",
        "      <td>7.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>0.072500</td>\n",
-       "      <td>0.072500</td>\n",
-       "      <td>0.151471</td>\n",
-       "      <td>0.133029</td>\n",
-       "      <td>151.12</td>\n",
-       "      <td>299.591429</td>\n",
-       "      <td>299.528571</td>\n",
-       "      <td>296.531429</td>\n",
-       "      <td>300.6</td>\n",
-       "      <td>298.4</td>\n",
+       "      <td>-0.044300</td>\n",
+       "      <td>0.082150</td>\n",
+       "      <td>0.144371</td>\n",
+       "      <td>0.116729</td>\n",
+       "      <td>30.81</td>\n",
+       "      <td>300.484286</td>\n",
+       "      <td>300.578571</td>\n",
+       "      <td>295.997143</td>\n",
+       "      <td>303.5</td>\n",
+       "      <td>297.5</td>\n",
        "      <td>...</td>\n",
-       "      <td>17.234286</td>\n",
-       "      <td>2.042857</td>\n",
-       "      <td>27.414286</td>\n",
-       "      <td>6.771429</td>\n",
-       "      <td>32.2</td>\n",
+       "      <td>3.157143</td>\n",
+       "      <td>27.400000</td>\n",
+       "      <td>6.042857</td>\n",
+       "      <td>31.1</td>\n",
        "      <td>23.3</td>\n",
-       "      <td>29.7</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
+       "      <td>91.7</td>\n",
        "      <td>7.0</td>\n",
+       "      <td>7.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>0.102450</td>\n",
-       "      <td>0.146175</td>\n",
-       "      <td>0.125571</td>\n",
-       "      <td>0.123600</td>\n",
-       "      <td>19.32</td>\n",
-       "      <td>299.578571</td>\n",
-       "      <td>299.557143</td>\n",
-       "      <td>296.378571</td>\n",
-       "      <td>302.1</td>\n",
-       "      <td>297.7</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.977143</td>\n",
-       "      <td>1.571429</td>\n",
-       "      <td>28.371429</td>\n",
-       "      <td>7.685714</td>\n",
-       "      <td>33.9</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>3.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>0.102450</td>\n",
-       "      <td>0.121550</td>\n",
-       "      <td>0.160683</td>\n",
-       "      <td>0.202567</td>\n",
-       "      <td>14.41</td>\n",
-       "      <td>300.154286</td>\n",
-       "      <td>300.278571</td>\n",
-       "      <td>296.651429</td>\n",
-       "      <td>302.3</td>\n",
-       "      <td>298.7</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.790000</td>\n",
-       "      <td>1.885714</td>\n",
-       "      <td>28.328571</td>\n",
-       "      <td>7.385714</td>\n",
-       "      <td>33.9</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>21.1</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>0.192875</td>\n",
-       "      <td>0.082350</td>\n",
-       "      <td>0.191943</td>\n",
-       "      <td>0.152929</td>\n",
-       "      <td>22.27</td>\n",
-       "      <td>299.512857</td>\n",
-       "      <td>299.592857</td>\n",
-       "      <td>296.041429</td>\n",
-       "      <td>301.8</td>\n",
-       "      <td>298.0</td>\n",
+       "      <td>0.010800</td>\n",
+       "      <td>0.049900</td>\n",
+       "      <td>0.100571</td>\n",
+       "      <td>0.117329</td>\n",
+       "      <td>8.02</td>\n",
+       "      <td>300.601429</td>\n",
+       "      <td>300.621429</td>\n",
+       "      <td>296.268571</td>\n",
+       "      <td>302.5</td>\n",
+       "      <td>298.5</td>\n",
        "      <td>...</td>\n",
-       "      <td>18.071429</td>\n",
-       "      <td>2.014286</td>\n",
-       "      <td>28.328571</td>\n",
-       "      <td>6.514286</td>\n",
-       "      <td>33.9</td>\n",
+       "      <td>3.900000</td>\n",
+       "      <td>28.757143</td>\n",
+       "      <td>6.985714</td>\n",
+       "      <td>34.4</td>\n",
        "      <td>24.4</td>\n",
-       "      <td>1.1</td>\n",
+       "      <td>0.3</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>7.0</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>0.291600</td>\n",
-       "      <td>0.211800</td>\n",
-       "      <td>0.301200</td>\n",
-       "      <td>0.280667</td>\n",
-       "      <td>59.17</td>\n",
-       "      <td>299.667143</td>\n",
-       "      <td>299.750000</td>\n",
-       "      <td>296.334286</td>\n",
-       "      <td>302.0</td>\n",
-       "      <td>297.3</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.418571</td>\n",
-       "      <td>2.157143</td>\n",
-       "      <td>27.557143</td>\n",
-       "      <td>7.157143</td>\n",
-       "      <td>31.7</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>63.7</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>7.0</td>\n",
-       "      <td>2.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>0.150567</td>\n",
-       "      <td>0.171700</td>\n",
-       "      <td>0.226900</td>\n",
-       "      <td>0.214557</td>\n",
-       "      <td>16.48</td>\n",
-       "      <td>299.558571</td>\n",
-       "      <td>299.635714</td>\n",
-       "      <td>295.960000</td>\n",
-       "      <td>301.8</td>\n",
-       "      <td>297.1</td>\n",
+       "      <th>9</th>\n",
+       "      <td>0.072667</td>\n",
+       "      <td>0.106660</td>\n",
+       "      <td>0.155429</td>\n",
+       "      <td>0.164900</td>\n",
+       "      <td>17.52</td>\n",
+       "      <td>300.497143</td>\n",
+       "      <td>300.528571</td>\n",
+       "      <td>296.411429</td>\n",
+       "      <td>302.3</td>\n",
+       "      <td>298.7</td>\n",
        "      <td>...</td>\n",
-       "      <td>17.737143</td>\n",
-       "      <td>2.414286</td>\n",
-       "      <td>28.128571</td>\n",
-       "      <td>6.900000</td>\n",
+       "      <td>2.785714</td>\n",
+       "      <td>28.657143</td>\n",
+       "      <td>6.242857</td>\n",
        "      <td>32.8</td>\n",
        "      <td>23.9</td>\n",
-       "      <td>12.2</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>0.150567</td>\n",
-       "      <td>0.247150</td>\n",
-       "      <td>0.379700</td>\n",
-       "      <td>0.381357</td>\n",
-       "      <td>32.66</td>\n",
-       "      <td>299.862857</td>\n",
-       "      <td>299.950000</td>\n",
-       "      <td>296.172857</td>\n",
-       "      <td>303.0</td>\n",
-       "      <td>298.3</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.341429</td>\n",
-       "      <td>2.071429</td>\n",
-       "      <td>28.114286</td>\n",
-       "      <td>6.357143</td>\n",
-       "      <td>31.7</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>32.6</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>0.150567</td>\n",
-       "      <td>0.064333</td>\n",
-       "      <td>0.164443</td>\n",
-       "      <td>0.138857</td>\n",
-       "      <td>28.80</td>\n",
-       "      <td>300.391429</td>\n",
-       "      <td>300.478571</td>\n",
-       "      <td>296.532857</td>\n",
-       "      <td>302.5</td>\n",
-       "      <td>298.8</td>\n",
-       "      <td>...</td>\n",
-       "      <td>17.594286</td>\n",
-       "      <td>2.585714</td>\n",
-       "      <td>28.242857</td>\n",
-       "      <td>8.085714</td>\n",
-       "      <td>34.4</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>37.6</td>\n",
-       "      <td>11.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>28.7</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>7.0</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>15 rows × 43 columns</p>\n",
+       "<p>10 rows × 44 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "     ndvi_ne   ndvi_nw   ndvi_se   ndvi_sw  precipitation_amt_mm  \\\n",
-       "0   0.122600  0.103725  0.198483  0.177617                 12.42   \n",
-       "1   0.169900  0.142175  0.162357  0.155486                 22.82   \n",
-       "2   0.032250  0.172967  0.157200  0.170843                 34.54   \n",
-       "3   0.128633  0.245067  0.227557  0.235886                 15.36   \n",
-       "4   0.196200  0.262200  0.251200  0.247340                  7.52   \n",
-       "5   0.196200  0.174850  0.254314  0.181743                  9.58   \n",
-       "6   0.112900  0.092800  0.205071  0.210271                  3.48   \n",
-       "7   0.072500  0.072500  0.151471  0.133029                151.12   \n",
-       "8   0.102450  0.146175  0.125571  0.123600                 19.32   \n",
-       "9   0.102450  0.121550  0.160683  0.202567                 14.41   \n",
-       "10  0.192875  0.082350  0.191943  0.152929                 22.27   \n",
-       "11  0.291600  0.211800  0.301200  0.280667                 59.17   \n",
-       "12  0.150567  0.171700  0.226900  0.214557                 16.48   \n",
-       "13  0.150567  0.247150  0.379700  0.381357                 32.66   \n",
-       "14  0.150567  0.064333  0.164443  0.138857                 28.80   \n",
+       "    ndvi_ne   ndvi_nw   ndvi_se   ndvi_sw  precipitation_amt_mm  \\\n",
+       "0 -0.018900 -0.018900  0.102729  0.091200                 78.60   \n",
+       "1 -0.018000 -0.012400  0.082043  0.072314                 12.56   \n",
+       "2 -0.001500 -0.012400  0.151083  0.091529                  3.66   \n",
+       "3 -0.001500 -0.019867  0.124329  0.125686                  0.00   \n",
+       "4  0.056800  0.039833  0.062267  0.075914                  0.76   \n",
+       "5 -0.044000 -0.030467  0.132000  0.083529                 71.17   \n",
+       "6 -0.044300 -0.024925  0.132271  0.159157                 48.99   \n",
+       "7 -0.044300  0.082150  0.144371  0.116729                 30.81   \n",
+       "8  0.010800  0.049900  0.100571  0.117329                  8.02   \n",
+       "9  0.072667  0.106660  0.155429  0.164900                 17.52   \n",
        "\n",
-       "    reanalysis_air_temp_k  reanalysis_avg_temp_k  reanalysis_dew_point_temp_k  \\\n",
-       "0              297.572857             297.742857                   292.414286   \n",
-       "1              298.211429             298.442857                   293.951429   \n",
-       "2              298.781429             298.878571                   295.434286   \n",
-       "3              298.987143             299.228571                   295.310000   \n",
-       "4              299.518571             299.664286                   295.821429   \n",
-       "5              299.630000             299.764286                   295.851429   \n",
-       "6              299.207143             299.221429                   295.865714   \n",
-       "7              299.591429             299.528571                   296.531429   \n",
-       "8              299.578571             299.557143                   296.378571   \n",
-       "9              300.154286             300.278571                   296.651429   \n",
-       "10             299.512857             299.592857                   296.041429   \n",
-       "11             299.667143             299.750000                   296.334286   \n",
-       "12             299.558571             299.635714                   295.960000   \n",
-       "13             299.862857             299.950000                   296.172857   \n",
-       "14             300.391429             300.478571                   296.532857   \n",
+       "   reanalysis_air_temp_k  reanalysis_avg_temp_k  reanalysis_dew_point_temp_k  \\\n",
+       "0             298.492857             298.550000                   294.527143   \n",
+       "1             298.475714             298.557143                   294.395714   \n",
+       "2             299.455714             299.357143                   295.308571   \n",
+       "3             299.690000             299.728571                   294.402857   \n",
+       "4             299.780000             299.671429                   294.760000   \n",
+       "5             299.768571             299.728571                   295.314286   \n",
+       "6             300.062857             300.007143                   295.650000   \n",
+       "7             300.484286             300.578571                   295.997143   \n",
+       "8             300.601429             300.621429                   296.268571   \n",
+       "9             300.497143             300.528571                   296.411429   \n",
        "\n",
-       "    reanalysis_max_air_temp_k  reanalysis_min_air_temp_k  ...  \\\n",
-       "0                       299.8                      295.9  ...   \n",
-       "1                       300.9                      296.4  ...   \n",
-       "2                       300.5                      297.3  ...   \n",
-       "3                       301.4                      297.0  ...   \n",
-       "4                       301.9                      297.5  ...   \n",
-       "5                       302.4                      298.1  ...   \n",
-       "6                       301.3                      297.7  ...   \n",
-       "7                       300.6                      298.4  ...   \n",
-       "8                       302.1                      297.7  ...   \n",
-       "9                       302.3                      298.7  ...   \n",
-       "10                      301.8                      298.0  ...   \n",
-       "11                      302.0                      297.3  ...   \n",
-       "12                      301.8                      297.1  ...   \n",
-       "13                      303.0                      298.3  ...   \n",
-       "14                      302.5                      298.8  ...   \n",
+       "   reanalysis_max_air_temp_k  reanalysis_min_air_temp_k  ...  \\\n",
+       "0                      301.1                      296.4  ...   \n",
+       "1                      300.8                      296.7  ...   \n",
+       "2                      302.2                      296.4  ...   \n",
+       "3                      303.0                      296.9  ...   \n",
+       "4                      302.3                      297.3  ...   \n",
+       "5                      301.9                      297.6  ...   \n",
+       "6                      302.4                      297.5  ...   \n",
+       "7                      303.5                      297.5  ...   \n",
+       "8                      302.5                      298.5  ...   \n",
+       "9                      302.3                      298.7  ...   \n",
        "\n",
-       "    last_weeks_0_reanalysis_specific_humidity_g_per_kg  \\\n",
-       "0                                           17.087143    \n",
-       "1                                           14.012857    \n",
-       "2                                           15.372857    \n",
-       "3                                           16.848571    \n",
-       "4                                           16.672857    \n",
-       "5                                           17.210000    \n",
-       "6                                           17.212857    \n",
-       "7                                           17.234286    \n",
-       "8                                           17.977143    \n",
-       "9                                           17.790000    \n",
-       "10                                          18.071429    \n",
-       "11                                          17.418571    \n",
-       "12                                          17.737143    \n",
-       "13                                          17.341429    \n",
-       "14                                          17.594286    \n",
+       "   last_weeks_0_reanalysis_tdtr_k  last_weeks_0_station_avg_temp_c  \\\n",
+       "0                        3.957143                        27.042857   \n",
+       "1                        3.128571                        26.528571   \n",
+       "2                        2.571429                        26.071429   \n",
+       "3                        4.428571                        27.928571   \n",
+       "4                        4.342857                        28.057143   \n",
+       "5                        3.542857                        27.614286   \n",
+       "6                        2.857143                        28.000000   \n",
+       "7                        3.157143                        27.400000   \n",
+       "8                        3.900000                        28.757143   \n",
+       "9                        2.785714                        28.657143   \n",
        "\n",
-       "    last_weeks_0_reanalysis_tdtr_k  last_weeks_0_station_avg_temp_c  \\\n",
-       "0                         2.857143                        27.400000   \n",
-       "1                         2.628571                        25.442857   \n",
-       "2                         2.371429                        26.714286   \n",
-       "3                         2.300000                        26.714286   \n",
-       "4                         2.428571                        27.471429   \n",
-       "5                         3.014286                        28.942857   \n",
-       "6                         2.100000                        28.114286   \n",
-       "7                         2.042857                        27.414286   \n",
-       "8                         1.571429                        28.371429   \n",
-       "9                         1.885714                        28.328571   \n",
-       "10                        2.014286                        28.328571   \n",
-       "11                        2.157143                        27.557143   \n",
-       "12                        2.414286                        28.128571   \n",
-       "13                        2.071429                        28.114286   \n",
-       "14                        2.585714                        28.242857   \n",
+       "   last_weeks_0_station_diur_temp_rng_c  last_weeks_0_station_max_temp_c  \\\n",
+       "0                              7.514286                             31.7   \n",
+       "1                              7.057143                             33.3   \n",
+       "2                              5.557143                             30.0   \n",
+       "3                              7.785714                             32.8   \n",
+       "4                              6.271429                             33.3   \n",
+       "5                              7.085714                             33.3   \n",
+       "6                              5.171429                             32.8   \n",
+       "7                              6.042857                             31.1   \n",
+       "8                              6.985714                             34.4   \n",
+       "9                              6.242857                             32.8   \n",
        "\n",
-       "    last_weeks_0_station_diur_temp_rng_c  last_weeks_0_station_max_temp_c  \\\n",
-       "0                               7.364286                             32.8   \n",
-       "1                               6.900000                             29.4   \n",
-       "2                               6.371429                             31.7   \n",
-       "3                               6.485714                             32.2   \n",
-       "4                               6.771429                             33.3   \n",
-       "5                               9.371429                             35.0   \n",
-       "6                               6.942857                             34.4   \n",
-       "7                               6.771429                             32.2   \n",
-       "8                               7.685714                             33.9   \n",
-       "9                               7.385714                             33.9   \n",
-       "10                              6.514286                             33.9   \n",
-       "11                              7.157143                             31.7   \n",
-       "12                              6.900000                             32.8   \n",
-       "13                              6.357143                             31.7   \n",
-       "14                              8.085714                             34.4   \n",
+       "   last_weeks_0_station_min_temp_c  last_weeks_0_station_precip_mm  \\\n",
+       "0                             23.3                             0.3   \n",
+       "1                             21.7                            75.2   \n",
+       "2                             22.2                            34.3   \n",
+       "3                             22.8                             3.0   \n",
+       "4                             24.4                             0.3   \n",
+       "5                             23.3                            84.1   \n",
+       "6                             25.0                            27.7   \n",
+       "7                             23.3                            91.7   \n",
+       "8                             24.4                             0.3   \n",
+       "9                             23.9                            28.7   \n",
        "\n",
-       "    last_weeks_0_station_min_temp_c  last_weeks_0_station_precip_mm  \\\n",
-       "0                              22.2                            23.8   \n",
-       "1                              20.0                            16.0   \n",
-       "2                              22.2                             8.6   \n",
-       "3                              22.8                            41.4   \n",
-       "4                              23.3                             4.0   \n",
-       "5                              23.9                             5.8   \n",
-       "6                              23.9                            39.1   \n",
-       "7                              23.3                            29.7   \n",
-       "8                              22.8                            21.1   \n",
-       "9                              22.8                            21.1   \n",
-       "10                             24.4                             1.1   \n",
-       "11                             21.7                            63.7   \n",
-       "12                             23.9                            12.2   \n",
-       "13                             22.8                            32.6   \n",
-       "14                             22.8                            37.6   \n",
+       "   last_infected_0  last_infected_1  last_infected_2  pred  \n",
+       "0              5.0              3.0              1.0     6  \n",
+       "1              6.0              5.0              3.0     7  \n",
+       "2              7.0              6.0              5.0     6  \n",
+       "3              6.0              7.0              6.0     6  \n",
+       "4              6.0              6.0              7.0     6  \n",
+       "5              6.0              6.0              6.0     7  \n",
+       "6              7.0              6.0              6.0     7  \n",
+       "7              7.0              7.0              6.0     6  \n",
+       "8              6.0              7.0              7.0     6  \n",
+       "9              6.0              6.0              7.0     7  \n",
        "\n",
-       "    last_infected_0  last_infected_1  last_infected_2  \n",
-       "0               0.0              0.0              0.0  \n",
-       "1               6.0              0.0              0.0  \n",
-       "2               9.0              6.0              0.0  \n",
-       "3               5.0              9.0              6.0  \n",
-       "4               0.0              5.0              9.0  \n",
-       "5               7.0              0.0              5.0  \n",
-       "6               3.0              7.0              0.0  \n",
-       "7               0.0              3.0              7.0  \n",
-       "8               1.0              0.0              3.0  \n",
-       "9               2.0              1.0              0.0  \n",
-       "10              7.0              2.0              1.0  \n",
-       "11              0.0              7.0              2.0  \n",
-       "12              7.0              0.0              7.0  \n",
-       "13              0.0              7.0              0.0  \n",
-       "14             11.0              0.0              7.0  \n",
-       "\n",
-       "[15 rows x 43 columns]"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "X_train.head(15)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "SVR(C=1685, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',\n",
-       "  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)"
+       "[10 rows x 44 columns]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 149,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "model"
+    "pd.concat([X_test_f, pd.DataFrame(predictions, columns=['pred'])], axis=1).head(10)"
    ]
   },
   {
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
index 41ab05f..f181aa9 100644
--- a/utils/LastInfected.py
+++ b/utils/LastInfected.py
@@ -47,8 +47,7 @@ def append_y(self, new_y):
         if self.add_noise:
             noise = int(np.round(choice([-1,1]) * gauss(mu=self.noise_mean, sigma=self.noise_std)))
             new_y += noise
-            print('With noise {}!!'.format(new_y))
-            if new_y < 0:
-                new_y = 0
+            #if new_y < 0:
+                #new_y = 0
         self.last[self.city].appendleft(new_y)
         self.last[self.city].pop()
\ No newline at end of file

From eda28aba74c6a42dfbded2810af5b6f40cbb5451 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 13:36:04 +0100
Subject: [PATCH 19/24] :(

---
 models.ipynb | 149 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 95 insertions(+), 54 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 011b2e3..489409f 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -199,7 +199,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=10\n",
+    "k_folds=\n",
     "n_iter_search = 40\n",
     "params = {'n_estimators': sp_randint(2,50), 'criterion':['mae'], 'max_depth': sp_randint(2, 10)}\n",
     "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
@@ -539,16 +539,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
+   "execution_count": 152,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(6.49747, 4.943615438027113)"
+       "(6.53353, 4.950353092366241)"
       ]
      },
-     "execution_count": 124,
+     "execution_count": 152,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -573,7 +573,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
+   "execution_count": 157,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -586,43 +586,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 121,
+   "execution_count": 155,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "-8.57545699492815"
-      ]
-     },
-     "execution_count": 121,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "#Forest_optimizer.fit(X_train, y_train)\n",
-    "model=Forest_optimizer.best_estimator_\n",
-    "Forest_optimizer.best_score_"
+    "model = RandomForestRegressor(criterion='mae', n_estimators=100, max_depth=3)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
+   "execution_count": 156,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n",
        "           max_features='auto', max_leaf_nodes=None,\n",
        "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
        "           min_samples_leaf=1, min_samples_split=2,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n",
        "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
       ]
      },
-     "execution_count": 127,
+     "execution_count": 156,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -633,7 +620,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 141,
+   "execution_count": 161,
    "metadata": {},
    "outputs": [
     {
@@ -642,7 +629,7 @@
        "416"
       ]
      },
-     "execution_count": 141,
+     "execution_count": 161,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -687,7 +674,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 149,
+   "execution_count": 162,
    "metadata": {},
    "outputs": [
     {
@@ -781,7 +768,7 @@
        "      <td>6.0</td>\n",
        "      <td>5.0</td>\n",
        "      <td>3.0</td>\n",
-       "      <td>7</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -802,7 +789,7 @@
        "      <td>30.0</td>\n",
        "      <td>22.2</td>\n",
        "      <td>34.3</td>\n",
-       "      <td>7.0</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
        "      <td>5.0</td>\n",
        "      <td>6</td>\n",
@@ -827,7 +814,7 @@
        "      <td>22.8</td>\n",
        "      <td>3.0</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>7.0</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
        "      <td>6</td>\n",
        "    </tr>\n",
@@ -852,7 +839,7 @@
        "      <td>0.3</td>\n",
        "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>7.0</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -877,7 +864,7 @@
        "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>7</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -898,10 +885,10 @@
        "      <td>32.8</td>\n",
        "      <td>25.0</td>\n",
        "      <td>27.7</td>\n",
-       "      <td>7.0</td>\n",
        "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>7</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -922,8 +909,8 @@
        "      <td>31.1</td>\n",
        "      <td>23.3</td>\n",
        "      <td>91.7</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>7.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
        "      <td>6</td>\n",
        "    </tr>\n",
@@ -947,8 +934,8 @@
        "      <td>24.4</td>\n",
        "      <td>0.3</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>7.0</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6.0</td>\n",
        "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -972,8 +959,8 @@
        "      <td>28.7</td>\n",
        "      <td>6.0</td>\n",
        "      <td>6.0</td>\n",
-       "      <td>7.0</td>\n",
-       "      <td>7</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1055,20 +1042,20 @@
        "\n",
        "   last_infected_0  last_infected_1  last_infected_2  pred  \n",
        "0              5.0              3.0              1.0     6  \n",
-       "1              6.0              5.0              3.0     7  \n",
-       "2              7.0              6.0              5.0     6  \n",
-       "3              6.0              7.0              6.0     6  \n",
-       "4              6.0              6.0              7.0     6  \n",
-       "5              6.0              6.0              6.0     7  \n",
-       "6              7.0              6.0              6.0     7  \n",
-       "7              7.0              7.0              6.0     6  \n",
-       "8              6.0              7.0              7.0     6  \n",
-       "9              6.0              6.0              7.0     7  \n",
+       "1              6.0              5.0              3.0     6  \n",
+       "2              6.0              6.0              5.0     6  \n",
+       "3              6.0              6.0              6.0     6  \n",
+       "4              6.0              6.0              6.0     6  \n",
+       "5              6.0              6.0              6.0     6  \n",
+       "6              6.0              6.0              6.0     6  \n",
+       "7              6.0              6.0              6.0     6  \n",
+       "8              6.0              6.0              6.0     6  \n",
+       "9              6.0              6.0              6.0     6  \n",
        "\n",
        "[10 rows x 44 columns]"
       ]
      },
-     "execution_count": 149,
+     "execution_count": 162,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1079,10 +1066,64 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 158,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7.860576923076923"
+      ]
+     },
+     "execution_count": 158,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import mean_absolute_error\n",
+    "mean_absolute_error(model.predict(X_train), y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 160,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "371.265"
+      ]
+     },
+     "execution_count": 160,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "max(model.predict(X_train))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 163,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "6"
+      ]
+     },
+     "execution_count": 163,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "max(predictions)"
+   ]
   }
  ],
  "metadata": {

From 54f8548ad07fbf537ec25cf64988b4b34fde187f Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 16:53:55 +0100
Subject: [PATCH 20/24] made new train_test split

---
 models.ipynb | 118 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 110 insertions(+), 8 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 489409f..5968e0c 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -170,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,11 +195,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=\n",
+    "k_folds=10\n",
     "n_iter_search = 40\n",
     "params = {'n_estimators': sp_randint(2,50), 'criterion':['mae'], 'max_depth': sp_randint(2, 10)}\n",
     "Forest_optimizer = RandomizedSearchCV(estimator=RandomForestRegressor(n_jobs=-1), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
@@ -216,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -402,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -1124,6 +1124,108 @@
    "source": [
     "max(predictions)"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test split of tail\n",
+    "* To simulate what we are doing with the test data, we are going to split the train data, for each city, by sampling N entries from the tail of each city for testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((933, 24), (933,), (518, 24), (518,))"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "idx_sj = X_train_1['city'] == 'sj'\n",
+    "X_sj = X_train_1[idx_sj]\n",
+    "y_sj = y_train[idx_sj]\n",
+    "\n",
+    "idx_iq = X_train_1['city'] == 'iq'\n",
+    "X_iq = X_train_1[idx_iq]\n",
+    "y_iq = y_train[idx_iq]\n",
+    "\n",
+    "X_sj.shape, y_sj.shape, X_iq.shape, y_iq.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((653, 24), (280, 24), (653,), (280,), (362, 24), (156, 24), (362,), (156,))"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "l = train_test_split(X_sj, y_sj, train_size=0.7, test_size=None, shuffle=False)\n",
+    "X_train_sj = l[0]\n",
+    "X_test_sj = l[1]\n",
+    "y_train_sj = l[2]\n",
+    "y_test_sj = l[3]\n",
+    "\n",
+    "l = train_test_split(X_iq, y_iq, train_size=0.7, test_size=None, shuffle=False)\n",
+    "X_train_iq = l[0]\n",
+    "X_test_iq = l[1]\n",
+    "y_train_iq = l[2]\n",
+    "y_test_iq = l[3]\n",
+    "\n",
+    "X_train_sj.shape, X_test_sj.shape, y_train_sj.shape, y_test_sj.shape, X_train_iq.shape, X_test_iq.shape, y_train_iq.shape, y_test_iq.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((1015, 24), (1015,), (436, 24), (436,))"
+      ]
+     },
+     "execution_count": 49,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train_2 = pd.concat([X_train_sj, X_train_iq])\n",
+    "y_train_2 = pd.concat([y_train_sj, y_train_iq])\n",
+    "X_test_2 = pd.concat([X_test_sj, X_test_iq])\n",
+    "y_test_2 = pd.concat([y_test_sj, y_test_iq])\n",
+    "\n",
+    "X_train_2.shape, y_train_2.shape, X_test_2.shape, y_test_2.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From 3a053a57f31e36624d657b17fc88de3c585904d0 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 18:48:31 +0100
Subject: [PATCH 21/24] refactored predict_in_order and made the split

---
 models.ipynb                           | 168 +++++++++++++++++++++----
 utils/LastInfected.py                  |   2 +-
 OurPipeline.py => utils/OurPipeline.py |   0
 utils/predict_in_order.py              |  14 +++
 4 files changed, 157 insertions(+), 27 deletions(-)
 rename OurPipeline.py => utils/OurPipeline.py (100%)
 create mode 100644 utils/predict_in_order.py

diff --git a/models.ipynb b/models.ipynb
index 5968e0c..199dc28 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 156,
    "metadata": {},
    "outputs": [
     {
@@ -52,7 +52,7 @@
        " 'station_precip_mm']"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 156,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -402,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 157,
    "metadata": {},
    "outputs": [
     {
@@ -636,21 +636,13 @@
    ],
    "source": [
     "%autoreload\n",
-    "from OurPipeline import create_pipeline\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "from utils.predict_in_order import predict_in_order\n",
     "\n",
     "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
     "pipeline.fit_transform(X_train_1, y_train)\n",
     "\n",
-    "X_test_f = pd.DataFrame([], columns=attr[4:])\n",
-    "predictions=[]\n",
-    "for idx in range(X_test_1.shape[0]):\n",
-    "    x = pipeline.transform(X_test_1.loc[idx:idx,:])\n",
-    "    X_test_f = X_test_f.append(x, sort=False, ignore_index=True)\n",
-    "    pred = model.predict(x)\n",
-    "    pred = int(np.round(pred))\n",
-    "    pipeline.named_steps['l_infected'].append_y(pred)\n",
-    "    predictions.append(pred)\n",
-    "len(predictions)"
+    "predict_in_order(X_test_1, model, pipeline)"
    ]
   },
   {
@@ -1135,7 +1127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 101,
    "metadata": {},
    "outputs": [
     {
@@ -1144,7 +1136,7 @@
        "((933, 24), (933,), (518, 24), (518,))"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 101,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1163,16 +1155,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 102,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "((653, 24), (280, 24), (653,), (280,), (362, 24), (156, 24), (362,), (156,))"
+       "((466, 24), (467, 24), (466,), (467,), (259, 24), (259, 24), (259,), (259,))"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 102,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1180,13 +1172,13 @@
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "\n",
-    "l = train_test_split(X_sj, y_sj, train_size=0.7, test_size=None, shuffle=False)\n",
+    "l = train_test_split(X_sj, y_sj, train_size=0.5, test_size=None, shuffle=False)\n",
     "X_train_sj = l[0]\n",
     "X_test_sj = l[1]\n",
     "y_train_sj = l[2]\n",
     "y_test_sj = l[3]\n",
     "\n",
-    "l = train_test_split(X_iq, y_iq, train_size=0.7, test_size=None, shuffle=False)\n",
+    "l = train_test_split(X_iq, y_iq, train_size=0.5, test_size=None, shuffle=False)\n",
     "X_train_iq = l[0]\n",
     "X_test_iq = l[1]\n",
     "y_train_iq = l[2]\n",
@@ -1197,16 +1189,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 103,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "((1015, 24), (1015,), (436, 24), (436,))"
+       "((725, 24), (725,), (726, 24), (726,))"
       ]
      },
-     "execution_count": 49,
+     "execution_count": 103,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1217,15 +1209,139 @@
     "X_test_2 = pd.concat([X_test_sj, X_test_iq])\n",
     "y_test_2 = pd.concat([y_test_sj, y_test_iq])\n",
     "\n",
+    "X_train_2.reset_index(drop=True, inplace=True)\n",
+    "X_test_2.reset_index(drop=True, inplace=True)\n",
+    "y_train_2.reset_index(drop=True, inplace=True)\n",
+    "y_test_2.reset_index(drop=True, inplace=True)\n",
     "X_train_2.shape, y_train_2.shape, X_test_2.shape, y_test_2.shape"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Pipeline"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 151,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from OurPipeline import create_pipeline\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_2, y_train_2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 152,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n",
+       "           max_features='auto', max_leaf_nodes=None,\n",
+       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "           min_samples_leaf=1, min_samples_split=2,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=150, n_jobs=None,\n",
+       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 152,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = RandomForestRegressor(criterion='mae', n_estimators=150, max_depth=3)\n",
+    "model.fit(X_train, y_train_2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 153,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "from utils.predict_in_order import predict_in_order\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
+    "pipeline.fit_transform(X_train_2, y_train_2)\n",
+    "\n",
+    "pred = predict_in_order(X_test_2, model=model, pipeline=pipeline)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 154,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "11.414600550964188"
+      ]
+     },
+     "execution_count": 154,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import mean_absolute_error\n",
+    "mean_absolute_error(pred, y_test_2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Submit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 158,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "from utils.predict_in_order import predict_in_order\n",
+    "\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1, y_train)\n",
+    "\n",
+    "model.fit(X_train, y_train)\n",
+    "\n",
+    "pred = predict_in_order(X_test_1, model=model, pipeline=pipeline)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 161,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "submit = pd.DataFrame(pred, columns=['total_cases'])\n",
+    "x_3 = X_test_1.iloc[:,:3].copy()\n",
+    "submit = pd.concat([x_3, submit], axis=1)\n",
+    "submit.to_csv('data/submit.csv', index=False)"
+   ]
   }
  ],
  "metadata": {
diff --git a/utils/LastInfected.py b/utils/LastInfected.py
index f181aa9..de8dc36 100644
--- a/utils/LastInfected.py
+++ b/utils/LastInfected.py
@@ -15,9 +15,9 @@ def __init__(self, weeks=1, new_attributes_prefix='last_infected_', add_noise=Fa
         self.noise_mean = noise_mean
         self.noise_std = noise_std
 
-        self.first = True
     
     def fit(self, X, y):
+        self.first = True
         self.y = y.to_list()
         return self
     
diff --git a/OurPipeline.py b/utils/OurPipeline.py
similarity index 100%
rename from OurPipeline.py
rename to utils/OurPipeline.py
diff --git a/utils/predict_in_order.py b/utils/predict_in_order.py
new file mode 100644
index 0000000..e5dc45a
--- /dev/null
+++ b/utils/predict_in_order.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+def predict_in_order(X, model, pipeline):
+    #X_test_f = pd.DataFrame([], columns=attr[4:])
+    predictions=[]
+    for idx in range(X.shape[0]):
+        x = pipeline.transform(X.loc[idx:idx,:])
+        #X_test_f = X_test_f.append(x, sort=False, ignore_index=True)
+        pred = model.predict(x)
+        pred = int(np.round(pred))
+        pipeline.named_steps['l_infected'].append_y(pred)
+        predictions.append(pred)
+    
+    return predictions
\ No newline at end of file

From 9cd1ec1de416e081f37eb40ef9d537e62c242f3a Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Fri, 19 Apr 2019 20:03:38 +0100
Subject: [PATCH 22/24] :(

---
 models.ipynb | 598 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 573 insertions(+), 25 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index 199dc28..c45d042 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 156,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -52,7 +52,7 @@
        " 'station_precip_mm']"
       ]
      },
-     "execution_count": 156,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -402,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 157,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -568,7 +568,7 @@
    "source": [
     "# One by one prediction with noise\n",
     "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n",
-    "* A very likely guess is that the errors when y is low is much smaller than when y is high."
+    "* A very likely guess is that the error when y is low is much smaller than when y is high."
    ]
   },
   {
@@ -1127,7 +1127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 101,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -1136,7 +1136,7 @@
        "((933, 24), (933,), (518, 24), (518,))"
       ]
      },
-     "execution_count": 101,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1155,16 +1155,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "((466, 24), (467, 24), (466,), (467,), (259, 24), (259, 24), (259,), (259,))"
+       "((186, 24), (747, 24), (186,), (747,), (103, 24), (415, 24), (103,), (415,))"
       ]
      },
-     "execution_count": 102,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1172,13 +1172,13 @@
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "\n",
-    "l = train_test_split(X_sj, y_sj, train_size=0.5, test_size=None, shuffle=False)\n",
+    "l = train_test_split(X_sj, y_sj, train_size=0.2, test_size=None, shuffle=False)\n",
     "X_train_sj = l[0]\n",
     "X_test_sj = l[1]\n",
     "y_train_sj = l[2]\n",
     "y_test_sj = l[3]\n",
     "\n",
-    "l = train_test_split(X_iq, y_iq, train_size=0.5, test_size=None, shuffle=False)\n",
+    "l = train_test_split(X_iq, y_iq, train_size=0.2, test_size=None, shuffle=False)\n",
     "X_train_iq = l[0]\n",
     "X_test_iq = l[1]\n",
     "y_train_iq = l[2]\n",
@@ -1189,16 +1189,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 103,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "((725, 24), (725,), (726, 24), (726,))"
+       "((289, 24), (289,), (1162, 24), (1162,))"
       ]
      },
-     "execution_count": 103,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1225,12 +1225,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 151,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
     "%autoreload\n",
-    "from OurPipeline import create_pipeline\n",
+    "from utils.OurPipeline import create_pipeline\n",
     "\n",
     "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
     "X_train = pipeline.fit_transform(X_train_2, y_train_2)"
@@ -1245,7 +1245,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 152,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1259,7 +1259,7 @@
        "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
       ]
      },
-     "execution_count": 152,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1271,7 +1271,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 153,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1287,16 +1287,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 154,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "11.414600550964188"
+       "22.689328743545612"
       ]
      },
-     "execution_count": 154,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1315,7 +1315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 158,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1333,7 +1333,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 161,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1342,6 +1342,554 @@
     "submit = pd.concat([x_3, submit], axis=1)\n",
     "submit.to_csv('data/submit.csv', index=False)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>city</th>\n",
+       "      <th>year</th>\n",
+       "      <th>weekofyear</th>\n",
+       "      <th>total_cases</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>18</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>19</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>20</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>21</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>23</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>24</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>25</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>26</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>27</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>28</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>29</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>30</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>31</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>32</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>33</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>34</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>35</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>36</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>37</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>38</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>39</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>40</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>41</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>42</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>43</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>44</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>45</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>46</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>sj</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>47</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>386</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>48</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>387</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>49</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>388</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>50</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>389</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2012</td>\n",
+       "      <td>51</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>390</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>391</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>392</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>3</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>393</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>394</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>395</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>396</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>397</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>8</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>398</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>399</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>10</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>400</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>11</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>401</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>12</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>402</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>13</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>403</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>14</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>404</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>15</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>405</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>16</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>406</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>17</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>407</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>18</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>408</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>19</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>409</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>20</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>410</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>21</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>411</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>22</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>412</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>23</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>413</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>24</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>414</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>25</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>415</th>\n",
+       "      <td>iq</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>26</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>416 rows × 4 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    city  year  weekofyear  total_cases\n",
+       "0     sj  2008          18            6\n",
+       "1     sj  2008          19            6\n",
+       "2     sj  2008          20            6\n",
+       "3     sj  2008          21            6\n",
+       "4     sj  2008          22            6\n",
+       "5     sj  2008          23            6\n",
+       "6     sj  2008          24            6\n",
+       "7     sj  2008          25            6\n",
+       "8     sj  2008          26            6\n",
+       "9     sj  2008          27            6\n",
+       "10    sj  2008          28            6\n",
+       "11    sj  2008          29            6\n",
+       "12    sj  2008          30            6\n",
+       "13    sj  2008          31            6\n",
+       "14    sj  2008          32            6\n",
+       "15    sj  2008          33            6\n",
+       "16    sj  2008          34            6\n",
+       "17    sj  2008          35            6\n",
+       "18    sj  2008          36            6\n",
+       "19    sj  2008          37            6\n",
+       "20    sj  2008          38            6\n",
+       "21    sj  2008          39            6\n",
+       "22    sj  2008          40            6\n",
+       "23    sj  2008          41            6\n",
+       "24    sj  2008          42            6\n",
+       "25    sj  2008          43            6\n",
+       "26    sj  2008          44            6\n",
+       "27    sj  2008          45            6\n",
+       "28    sj  2008          46            6\n",
+       "29    sj  2008          47            6\n",
+       "..   ...   ...         ...          ...\n",
+       "386   iq  2012          48            6\n",
+       "387   iq  2012          49            6\n",
+       "388   iq  2012          50            6\n",
+       "389   iq  2012          51            6\n",
+       "390   iq  2013           1            6\n",
+       "391   iq  2013           2            6\n",
+       "392   iq  2013           3            6\n",
+       "393   iq  2013           4            6\n",
+       "394   iq  2013           5            6\n",
+       "395   iq  2013           6            6\n",
+       "396   iq  2013           7            6\n",
+       "397   iq  2013           8            6\n",
+       "398   iq  2013           9            6\n",
+       "399   iq  2013          10            6\n",
+       "400   iq  2013          11            6\n",
+       "401   iq  2013          12            6\n",
+       "402   iq  2013          13            6\n",
+       "403   iq  2013          14            6\n",
+       "404   iq  2013          15            6\n",
+       "405   iq  2013          16            6\n",
+       "406   iq  2013          17            6\n",
+       "407   iq  2013          18            6\n",
+       "408   iq  2013          19            6\n",
+       "409   iq  2013          20            6\n",
+       "410   iq  2013          21            6\n",
+       "411   iq  2013          22            6\n",
+       "412   iq  2013          23            6\n",
+       "413   iq  2013          24            6\n",
+       "414   iq  2013          25            6\n",
+       "415   iq  2013          26            6\n",
+       "\n",
+       "[416 rows x 4 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "submit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From ed24cc387af782fc3cf9c17d64e23414aff83653 Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Mon, 22 Apr 2019 18:56:25 +0100
Subject: [PATCH 23/24] yo

---
 models.ipynb         | 1018 ++++++++++++++++++------------------------
 utils/OurPipeline.py |   10 +-
 2 files changed, 435 insertions(+), 593 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index c45d042..c9ab18c 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -112,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,12 +140,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [],
    "source": [
-    "k_folds=10\n",
-    "n_iter_search = 20\n",
+    "k_folds=5\n",
+    "n_iter_search = 10\n",
     "C = sp_randint(0, 10000)\n",
     "params = {'kernel':['linear'], 'gamma':['scale'], 'C': C}\n",
     "SVR_optimizer = RandomizedSearchCV(estimator=SVR(), param_distributions=params, n_iter=n_iter_search, scoring=score_metric, n_jobs=jobs, cv=k_folds, verbose=verbose_level, random_state=random_n, return_train_score=True, iid=True)"
@@ -170,7 +170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 52,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -195,7 +195,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -216,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -237,7 +237,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -258,37 +258,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Best score of 10.757898351648352 with the estimator DecisionTreeRegressor(criterion='mae', max_depth=7, max_features=None,\n",
-      "           max_leaf_nodes=None, min_impurity_decrease=0.0,\n",
-      "           min_impurity_split=None, min_samples_leaf=0.048518173584686866,\n",
-      "           min_samples_split=0.08977730688967958,\n",
-      "           min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n",
-      "           splitter='best')\n",
-      "1/4\t\n",
-      "Best score of 8.57545699492815 with the estimator RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=5,\n",
-      "           max_features='auto', max_leaf_nodes=None,\n",
-      "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-      "           min_samples_leaf=1, min_samples_split=2,\n",
-      "           min_weight_fraction_leaf=0.0, n_estimators=26, n_jobs=-1,\n",
-      "           oob_score=False, random_state=None, verbose=0, warm_start=False)\n",
-      "2/4\t3/4\t4/4\t"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
     "from sklearn.decomposition import PCA\n",
     "\n",
-    "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer]#, SVR_optimizer]\n",
+    "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
     "weeks = [1]\n",
     "weeks_infected = [3]\n",
     "pca = [None]\n",
@@ -402,7 +380,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -568,7 +546,7 @@
    "source": [
     "# One by one prediction with noise\n",
     "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n",
-    "* A very likely guess is that the error when y is low is much smaller than when y is high."
+    "* A very likely guess for why it isn't working is that the error when y is low is much smaller than when y is high."
    ]
   },
   {
@@ -664,398 +642,6 @@
     "submit.to_csv('data/submit.csv', index=False)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 162,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>ndvi_ne</th>\n",
-       "      <th>ndvi_nw</th>\n",
-       "      <th>ndvi_se</th>\n",
-       "      <th>ndvi_sw</th>\n",
-       "      <th>precipitation_amt_mm</th>\n",
-       "      <th>reanalysis_air_temp_k</th>\n",
-       "      <th>reanalysis_avg_temp_k</th>\n",
-       "      <th>reanalysis_dew_point_temp_k</th>\n",
-       "      <th>reanalysis_max_air_temp_k</th>\n",
-       "      <th>reanalysis_min_air_temp_k</th>\n",
-       "      <th>...</th>\n",
-       "      <th>last_weeks_0_reanalysis_tdtr_k</th>\n",
-       "      <th>last_weeks_0_station_avg_temp_c</th>\n",
-       "      <th>last_weeks_0_station_diur_temp_rng_c</th>\n",
-       "      <th>last_weeks_0_station_max_temp_c</th>\n",
-       "      <th>last_weeks_0_station_min_temp_c</th>\n",
-       "      <th>last_weeks_0_station_precip_mm</th>\n",
-       "      <th>last_infected_0</th>\n",
-       "      <th>last_infected_1</th>\n",
-       "      <th>last_infected_2</th>\n",
-       "      <th>pred</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.018900</td>\n",
-       "      <td>-0.018900</td>\n",
-       "      <td>0.102729</td>\n",
-       "      <td>0.091200</td>\n",
-       "      <td>78.60</td>\n",
-       "      <td>298.492857</td>\n",
-       "      <td>298.550000</td>\n",
-       "      <td>294.527143</td>\n",
-       "      <td>301.1</td>\n",
-       "      <td>296.4</td>\n",
-       "      <td>...</td>\n",
-       "      <td>3.957143</td>\n",
-       "      <td>27.042857</td>\n",
-       "      <td>7.514286</td>\n",
-       "      <td>31.7</td>\n",
-       "      <td>23.3</td>\n",
-       "      <td>0.3</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.018000</td>\n",
-       "      <td>-0.012400</td>\n",
-       "      <td>0.082043</td>\n",
-       "      <td>0.072314</td>\n",
-       "      <td>12.56</td>\n",
-       "      <td>298.475714</td>\n",
-       "      <td>298.557143</td>\n",
-       "      <td>294.395714</td>\n",
-       "      <td>300.8</td>\n",
-       "      <td>296.7</td>\n",
-       "      <td>...</td>\n",
-       "      <td>3.128571</td>\n",
-       "      <td>26.528571</td>\n",
-       "      <td>7.057143</td>\n",
-       "      <td>33.3</td>\n",
-       "      <td>21.7</td>\n",
-       "      <td>75.2</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.001500</td>\n",
-       "      <td>-0.012400</td>\n",
-       "      <td>0.151083</td>\n",
-       "      <td>0.091529</td>\n",
-       "      <td>3.66</td>\n",
-       "      <td>299.455714</td>\n",
-       "      <td>299.357143</td>\n",
-       "      <td>295.308571</td>\n",
-       "      <td>302.2</td>\n",
-       "      <td>296.4</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.571429</td>\n",
-       "      <td>26.071429</td>\n",
-       "      <td>5.557143</td>\n",
-       "      <td>30.0</td>\n",
-       "      <td>22.2</td>\n",
-       "      <td>34.3</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>5.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.001500</td>\n",
-       "      <td>-0.019867</td>\n",
-       "      <td>0.124329</td>\n",
-       "      <td>0.125686</td>\n",
-       "      <td>0.00</td>\n",
-       "      <td>299.690000</td>\n",
-       "      <td>299.728571</td>\n",
-       "      <td>294.402857</td>\n",
-       "      <td>303.0</td>\n",
-       "      <td>296.9</td>\n",
-       "      <td>...</td>\n",
-       "      <td>4.428571</td>\n",
-       "      <td>27.928571</td>\n",
-       "      <td>7.785714</td>\n",
-       "      <td>32.8</td>\n",
-       "      <td>22.8</td>\n",
-       "      <td>3.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0.056800</td>\n",
-       "      <td>0.039833</td>\n",
-       "      <td>0.062267</td>\n",
-       "      <td>0.075914</td>\n",
-       "      <td>0.76</td>\n",
-       "      <td>299.780000</td>\n",
-       "      <td>299.671429</td>\n",
-       "      <td>294.760000</td>\n",
-       "      <td>302.3</td>\n",
-       "      <td>297.3</td>\n",
-       "      <td>...</td>\n",
-       "      <td>4.342857</td>\n",
-       "      <td>28.057143</td>\n",
-       "      <td>6.271429</td>\n",
-       "      <td>33.3</td>\n",
-       "      <td>24.4</td>\n",
-       "      <td>0.3</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>-0.044000</td>\n",
-       "      <td>-0.030467</td>\n",
-       "      <td>0.132000</td>\n",
-       "      <td>0.083529</td>\n",
-       "      <td>71.17</td>\n",
-       "      <td>299.768571</td>\n",
-       "      <td>299.728571</td>\n",
-       "      <td>295.314286</td>\n",
-       "      <td>301.9</td>\n",
-       "      <td>297.6</td>\n",
-       "      <td>...</td>\n",
-       "      <td>3.542857</td>\n",
-       "      <td>27.614286</td>\n",
-       "      <td>7.085714</td>\n",
-       "      <td>33.3</td>\n",
-       "      <td>23.3</td>\n",
-       "      <td>84.1</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>-0.044300</td>\n",
-       "      <td>-0.024925</td>\n",
-       "      <td>0.132271</td>\n",
-       "      <td>0.159157</td>\n",
-       "      <td>48.99</td>\n",
-       "      <td>300.062857</td>\n",
-       "      <td>300.007143</td>\n",
-       "      <td>295.650000</td>\n",
-       "      <td>302.4</td>\n",
-       "      <td>297.5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.857143</td>\n",
-       "      <td>28.000000</td>\n",
-       "      <td>5.171429</td>\n",
-       "      <td>32.8</td>\n",
-       "      <td>25.0</td>\n",
-       "      <td>27.7</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>-0.044300</td>\n",
-       "      <td>0.082150</td>\n",
-       "      <td>0.144371</td>\n",
-       "      <td>0.116729</td>\n",
-       "      <td>30.81</td>\n",
-       "      <td>300.484286</td>\n",
-       "      <td>300.578571</td>\n",
-       "      <td>295.997143</td>\n",
-       "      <td>303.5</td>\n",
-       "      <td>297.5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>3.157143</td>\n",
-       "      <td>27.400000</td>\n",
-       "      <td>6.042857</td>\n",
-       "      <td>31.1</td>\n",
-       "      <td>23.3</td>\n",
-       "      <td>91.7</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>0.010800</td>\n",
-       "      <td>0.049900</td>\n",
-       "      <td>0.100571</td>\n",
-       "      <td>0.117329</td>\n",
-       "      <td>8.02</td>\n",
-       "      <td>300.601429</td>\n",
-       "      <td>300.621429</td>\n",
-       "      <td>296.268571</td>\n",
-       "      <td>302.5</td>\n",
-       "      <td>298.5</td>\n",
-       "      <td>...</td>\n",
-       "      <td>3.900000</td>\n",
-       "      <td>28.757143</td>\n",
-       "      <td>6.985714</td>\n",
-       "      <td>34.4</td>\n",
-       "      <td>24.4</td>\n",
-       "      <td>0.3</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>0.072667</td>\n",
-       "      <td>0.106660</td>\n",
-       "      <td>0.155429</td>\n",
-       "      <td>0.164900</td>\n",
-       "      <td>17.52</td>\n",
-       "      <td>300.497143</td>\n",
-       "      <td>300.528571</td>\n",
-       "      <td>296.411429</td>\n",
-       "      <td>302.3</td>\n",
-       "      <td>298.7</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.785714</td>\n",
-       "      <td>28.657143</td>\n",
-       "      <td>6.242857</td>\n",
-       "      <td>32.8</td>\n",
-       "      <td>23.9</td>\n",
-       "      <td>28.7</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>10 rows × 44 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    ndvi_ne   ndvi_nw   ndvi_se   ndvi_sw  precipitation_amt_mm  \\\n",
-       "0 -0.018900 -0.018900  0.102729  0.091200                 78.60   \n",
-       "1 -0.018000 -0.012400  0.082043  0.072314                 12.56   \n",
-       "2 -0.001500 -0.012400  0.151083  0.091529                  3.66   \n",
-       "3 -0.001500 -0.019867  0.124329  0.125686                  0.00   \n",
-       "4  0.056800  0.039833  0.062267  0.075914                  0.76   \n",
-       "5 -0.044000 -0.030467  0.132000  0.083529                 71.17   \n",
-       "6 -0.044300 -0.024925  0.132271  0.159157                 48.99   \n",
-       "7 -0.044300  0.082150  0.144371  0.116729                 30.81   \n",
-       "8  0.010800  0.049900  0.100571  0.117329                  8.02   \n",
-       "9  0.072667  0.106660  0.155429  0.164900                 17.52   \n",
-       "\n",
-       "   reanalysis_air_temp_k  reanalysis_avg_temp_k  reanalysis_dew_point_temp_k  \\\n",
-       "0             298.492857             298.550000                   294.527143   \n",
-       "1             298.475714             298.557143                   294.395714   \n",
-       "2             299.455714             299.357143                   295.308571   \n",
-       "3             299.690000             299.728571                   294.402857   \n",
-       "4             299.780000             299.671429                   294.760000   \n",
-       "5             299.768571             299.728571                   295.314286   \n",
-       "6             300.062857             300.007143                   295.650000   \n",
-       "7             300.484286             300.578571                   295.997143   \n",
-       "8             300.601429             300.621429                   296.268571   \n",
-       "9             300.497143             300.528571                   296.411429   \n",
-       "\n",
-       "   reanalysis_max_air_temp_k  reanalysis_min_air_temp_k  ...  \\\n",
-       "0                      301.1                      296.4  ...   \n",
-       "1                      300.8                      296.7  ...   \n",
-       "2                      302.2                      296.4  ...   \n",
-       "3                      303.0                      296.9  ...   \n",
-       "4                      302.3                      297.3  ...   \n",
-       "5                      301.9                      297.6  ...   \n",
-       "6                      302.4                      297.5  ...   \n",
-       "7                      303.5                      297.5  ...   \n",
-       "8                      302.5                      298.5  ...   \n",
-       "9                      302.3                      298.7  ...   \n",
-       "\n",
-       "   last_weeks_0_reanalysis_tdtr_k  last_weeks_0_station_avg_temp_c  \\\n",
-       "0                        3.957143                        27.042857   \n",
-       "1                        3.128571                        26.528571   \n",
-       "2                        2.571429                        26.071429   \n",
-       "3                        4.428571                        27.928571   \n",
-       "4                        4.342857                        28.057143   \n",
-       "5                        3.542857                        27.614286   \n",
-       "6                        2.857143                        28.000000   \n",
-       "7                        3.157143                        27.400000   \n",
-       "8                        3.900000                        28.757143   \n",
-       "9                        2.785714                        28.657143   \n",
-       "\n",
-       "   last_weeks_0_station_diur_temp_rng_c  last_weeks_0_station_max_temp_c  \\\n",
-       "0                              7.514286                             31.7   \n",
-       "1                              7.057143                             33.3   \n",
-       "2                              5.557143                             30.0   \n",
-       "3                              7.785714                             32.8   \n",
-       "4                              6.271429                             33.3   \n",
-       "5                              7.085714                             33.3   \n",
-       "6                              5.171429                             32.8   \n",
-       "7                              6.042857                             31.1   \n",
-       "8                              6.985714                             34.4   \n",
-       "9                              6.242857                             32.8   \n",
-       "\n",
-       "   last_weeks_0_station_min_temp_c  last_weeks_0_station_precip_mm  \\\n",
-       "0                             23.3                             0.3   \n",
-       "1                             21.7                            75.2   \n",
-       "2                             22.2                            34.3   \n",
-       "3                             22.8                             3.0   \n",
-       "4                             24.4                             0.3   \n",
-       "5                             23.3                            84.1   \n",
-       "6                             25.0                            27.7   \n",
-       "7                             23.3                            91.7   \n",
-       "8                             24.4                             0.3   \n",
-       "9                             23.9                            28.7   \n",
-       "\n",
-       "   last_infected_0  last_infected_1  last_infected_2  pred  \n",
-       "0              5.0              3.0              1.0     6  \n",
-       "1              6.0              5.0              3.0     6  \n",
-       "2              6.0              6.0              5.0     6  \n",
-       "3              6.0              6.0              6.0     6  \n",
-       "4              6.0              6.0              6.0     6  \n",
-       "5              6.0              6.0              6.0     6  \n",
-       "6              6.0              6.0              6.0     6  \n",
-       "7              6.0              6.0              6.0     6  \n",
-       "8              6.0              6.0              6.0     6  \n",
-       "9              6.0              6.0              6.0     6  \n",
-       "\n",
-       "[10 rows x 44 columns]"
-      ]
-     },
-     "execution_count": 162,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pd.concat([X_test_f, pd.DataFrame(predictions, columns=['pred'])], axis=1).head(10)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 158,
@@ -1127,7 +713,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -1136,7 +722,7 @@
        "((933, 24), (933,), (518, 24), (518,))"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1155,16 +741,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "((186, 24), (747, 24), (186,), (747,), (103, 24), (415, 24), (103,), (415,))"
+       "((373, 24), (560, 24), (373,), (560,), (207, 24), (311, 24), (207,), (311,))"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1172,13 +758,13 @@
    "source": [
     "from sklearn.model_selection import train_test_split\n",
     "\n",
-    "l = train_test_split(X_sj, y_sj, train_size=0.2, test_size=None, shuffle=False)\n",
+    "l = train_test_split(X_sj, y_sj, train_size=0.4, test_size=None, shuffle=False)\n",
     "X_train_sj = l[0]\n",
     "X_test_sj = l[1]\n",
     "y_train_sj = l[2]\n",
     "y_test_sj = l[3]\n",
     "\n",
-    "l = train_test_split(X_iq, y_iq, train_size=0.2, test_size=None, shuffle=False)\n",
+    "l = train_test_split(X_iq, y_iq, train_size=0.4, test_size=None, shuffle=False)\n",
     "X_train_iq = l[0]\n",
     "X_test_iq = l[1]\n",
     "y_train_iq = l[2]\n",
@@ -1189,16 +775,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "((289, 24), (289,), (1162, 24), (1162,))"
+       "((580, 24), (580,), (871, 24), (871,))"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1225,7 +811,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1240,38 +826,68 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Train"
+    "### Train\n",
+    "* Since we can't use `RandomizedSearchCV` with this prediction mode, we opted to implement our own exhaustive search tool.\n",
+    "* `RandomForestRegressor`, the best combination was with 50 estimators and a maximum depth of 5."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n",
-       "           max_features='auto', max_leaf_nodes=None,\n",
-       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=1, min_samples_split=2,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=150, n_jobs=None,\n",
-       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
+       "(17.044776119402986, (50, 5))"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "model = RandomForestRegressor(criterion='mae', n_estimators=150, max_depth=3)\n",
-    "model.fit(X_train, y_train_2)"
+    "%autoreload\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "from utils.predict_in_order import predict_in_order\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "\n",
+    "estimators = [25, 50, 75]\n",
+    "depth = [2,3,4,5]\n",
+    "\n",
+    "best_mae=np.inf\n",
+    "best=None\n",
+    "for est in estimators:\n",
+    "    for d in depth:\n",
+    "        model = RandomForestRegressor(criterion='mae', n_estimators=est, max_depth=d)\n",
+    "        model.fit(X_train, y_train_2)\n",
+    "\n",
+    "\n",
+    "        #pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
+    "        #pipeline.fit_transform(X_train_2, y_train_2)\n",
+    "\n",
+    "        pred = predict_in_order(X_test_2, model=model, pipeline=pipeline)\n",
+    "\n",
+    "        mae = mean_absolute_error(pred, y_test_2)\n",
+    "        if mae < best_mae:\n",
+    "            best_mae = mae\n",
+    "            best = (est, d)\n",
+    "\n",
+    "best_mae, best"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Submit\n",
+    "* Even though we obtain a MAE of approximately 17 on our custom test set (which has twice as many entries as the one from the competition), when we submit the data with that model we obtain a MAE of approximately 30."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1280,60 +896,282 @@
     "from utils.predict_in_order import predict_in_order\n",
     "\n",
     "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
-    "pipeline.fit_transform(X_train_2, y_train_2)\n",
+    "X_train = pipeline.fit_transform(X_train_2, y_train_2)\n",
+    "\n",
+    "model = RandomForestRegressor(criterion='mae', n_estimators=50, max_depth=5)\n",
+    "model.fit(X_train, y_train_2)\n",
     "\n",
-    "pred = predict_in_order(X_test_2, model=model, pipeline=pipeline)"
+    "pred = predict_in_order(X_test_1, model=model, pipeline=pipeline)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "submit = pd.DataFrame(pred, columns=['total_cases'])\n",
+    "x_3 = X_test_1.iloc[:,:3].copy()\n",
+    "submit = pd.concat([x_3, submit], axis=1)\n",
+    "submit.to_csv('data/submit.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# A more simple prediction\n",
+    "* Given that we are not being able to make a very accurate prediction, perhaps the problem is the fact that we are trying to use the previous infected attribute, which clearly has potential, however we are not being able to harness it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
+    "weeks = [3]\n",
+    "\n",
+    "n_total = len(optimizers) * len(weeks) \n",
+    "\n",
+    "\n",
+    "results=[]\n",
+    "best_attempt = None\n",
+    "best_score = np.inf\n",
+    "idx=0\n",
+    "for opt in optimizers:\n",
+    "    for w in weeks:\n",
+    "        pipeline = create_pipeline(attr, n_weeks=w, estimator_optimizer=opt, pca=None)\n",
+    "        pipeline.fit(X_train_1, y_train)\n",
+    "        score = pipeline.named_steps['est_opt'].best_score_\n",
+    "        best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n",
+    "        attempt = [best_estimator, w, score]\n",
+    "        if abs(score) < best_score:\n",
+    "            best_score = abs(score)\n",
+    "            best_attempt = attempt\n",
+    "            print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n",
+    "        idx+=1\n",
+    "        print(str(idx) + '/' + str(n_total), end='\\t')\n",
+    "        results.append(attempt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-18.341489</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>-18.084080</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>-17.886975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-18.338104</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>-18.133689</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>-17.874649</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-20.234666</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>-20.272226</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>-19.484149</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>KNeighborsRegressor(algorithm='auto', leaf_siz...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>-20.432433</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "22.689328743545612"
+       "                                                   0  1          2\n",
+       "0  DecisionTreeRegressor(criterion='mae', max_dep...  1 -18.341489\n",
+       "1  DecisionTreeRegressor(criterion='mae', max_dep...  2 -18.084080\n",
+       "2  DecisionTreeRegressor(criterion='mae', max_dep...  3 -17.886975\n",
+       "3  (DecisionTreeRegressor(criterion='mae', max_de...  1 -18.338104\n",
+       "4  (DecisionTreeRegressor(criterion='mae', max_de...  2 -18.133689\n",
+       "5  (DecisionTreeRegressor(criterion='mae', max_de...  3 -17.874649\n",
+       "6  (DecisionTreeRegressor(criterion='mae', max_de...  1 -20.234666\n",
+       "7  (DecisionTreeRegressor(criterion='mae', max_de...  2 -20.272226\n",
+       "8  (DecisionTreeRegressor(criterion='mae', max_de...  3 -19.484149\n",
+       "9  KNeighborsRegressor(algorithm='auto', leaf_siz...  1 -20.432433"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 63,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from sklearn.metrics import mean_absolute_error\n",
-    "mean_absolute_error(pred, y_test_2)"
+    "pd.DataFrame(results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n",
+       "            max_features='auto', max_leaf_nodes=None,\n",
+       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "            min_samples_leaf=1, min_samples_split=2,\n",
+       "            min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
+       "            oob_score=False, random_state=None, verbose=0, warm_start=False),\n",
+       " 3,\n",
+       " -17.87464878333245]"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "best_attempt"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Submit"
+    "### Train"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 69,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n",
+       "           max_features='auto', max_leaf_nodes=None,\n",
+       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "           min_samples_leaf=1, min_samples_split=2,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
+       "           oob_score=False, random_state=42, verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "%autoreload\n",
     "from utils.OurPipeline import create_pipeline\n",
-    "from utils.predict_in_order import predict_in_order\n",
+    "pipeline = create_pipeline(attr, n_weeks=3, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1)\n",
     "\n",
-    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
-    "X_train = pipeline.fit_transform(X_train_1, y_train)\n",
-    "\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "pred = predict_in_order(X_test_1, model=model, pipeline=pipeline)"
+    "model = RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2, n_estimators=13, n_jobs=-1, random_state=random_n)\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_test = pipeline.transform(X_test_1)\n",
+    "pred = model.predict(X_test)\n",
+    "pred = list(map(lambda x: int(np.round(x)), pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Submit"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 78,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1345,7 +1183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 79,
    "metadata": {},
    "outputs": [
     {
@@ -1381,210 +1219,210 @@
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>18</td>\n",
-       "      <td>6</td>\n",
+       "      <td>19</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>19</td>\n",
-       "      <td>6</td>\n",
+       "      <td>15</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>20</td>\n",
-       "      <td>6</td>\n",
+       "      <td>12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>21</td>\n",
-       "      <td>6</td>\n",
+       "      <td>21</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>22</td>\n",
-       "      <td>6</td>\n",
+       "      <td>12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>23</td>\n",
-       "      <td>6</td>\n",
+       "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>24</td>\n",
-       "      <td>6</td>\n",
+       "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>25</td>\n",
-       "      <td>6</td>\n",
+       "      <td>24</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>26</td>\n",
-       "      <td>6</td>\n",
+       "      <td>24</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>27</td>\n",
-       "      <td>6</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>28</td>\n",
-       "      <td>6</td>\n",
+       "      <td>23</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>29</td>\n",
-       "      <td>6</td>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>30</td>\n",
-       "      <td>6</td>\n",
+       "      <td>38</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>31</td>\n",
-       "      <td>6</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>32</td>\n",
-       "      <td>6</td>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>33</td>\n",
-       "      <td>6</td>\n",
+       "      <td>28</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>34</td>\n",
-       "      <td>6</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>35</td>\n",
-       "      <td>6</td>\n",
+       "      <td>29</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>36</td>\n",
-       "      <td>6</td>\n",
+       "      <td>64</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>37</td>\n",
-       "      <td>6</td>\n",
+       "      <td>29</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>38</td>\n",
-       "      <td>6</td>\n",
+       "      <td>79</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>39</td>\n",
-       "      <td>6</td>\n",
+       "      <td>32</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>40</td>\n",
-       "      <td>6</td>\n",
+       "      <td>31</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>41</td>\n",
-       "      <td>6</td>\n",
+       "      <td>32</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>42</td>\n",
-       "      <td>6</td>\n",
+       "      <td>30</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>25</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>43</td>\n",
-       "      <td>6</td>\n",
+       "      <td>24</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>26</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>44</td>\n",
-       "      <td>6</td>\n",
+       "      <td>32</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>27</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>45</td>\n",
-       "      <td>6</td>\n",
+       "      <td>29</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>28</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>46</td>\n",
-       "      <td>6</td>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>29</th>\n",
        "      <td>sj</td>\n",
        "      <td>2008</td>\n",
        "      <td>47</td>\n",
-       "      <td>6</td>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -1598,210 +1436,210 @@
        "      <td>iq</td>\n",
        "      <td>2012</td>\n",
        "      <td>48</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>387</th>\n",
        "      <td>iq</td>\n",
        "      <td>2012</td>\n",
        "      <td>49</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>388</th>\n",
        "      <td>iq</td>\n",
        "      <td>2012</td>\n",
        "      <td>50</td>\n",
-       "      <td>6</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>389</th>\n",
        "      <td>iq</td>\n",
        "      <td>2012</td>\n",
        "      <td>51</td>\n",
-       "      <td>6</td>\n",
+       "      <td>8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>390</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>1</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>391</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>2</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>392</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>3</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>393</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>4</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>394</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>5</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>395</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>6</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>396</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>7</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>397</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>8</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>398</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>9</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>399</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>10</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>400</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>11</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>401</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>12</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>402</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>13</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>403</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>14</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>404</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>15</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>405</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>16</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>406</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>17</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>407</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>18</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>408</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>19</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>409</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>20</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>410</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>21</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>411</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>22</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>412</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>23</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>413</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>24</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>414</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>25</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>415</th>\n",
        "      <td>iq</td>\n",
        "      <td>2013</td>\n",
        "      <td>26</td>\n",
-       "      <td>6</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1810,72 +1648,72 @@
       ],
       "text/plain": [
        "    city  year  weekofyear  total_cases\n",
-       "0     sj  2008          18            6\n",
-       "1     sj  2008          19            6\n",
-       "2     sj  2008          20            6\n",
-       "3     sj  2008          21            6\n",
-       "4     sj  2008          22            6\n",
-       "5     sj  2008          23            6\n",
-       "6     sj  2008          24            6\n",
-       "7     sj  2008          25            6\n",
-       "8     sj  2008          26            6\n",
-       "9     sj  2008          27            6\n",
-       "10    sj  2008          28            6\n",
-       "11    sj  2008          29            6\n",
-       "12    sj  2008          30            6\n",
-       "13    sj  2008          31            6\n",
-       "14    sj  2008          32            6\n",
-       "15    sj  2008          33            6\n",
-       "16    sj  2008          34            6\n",
-       "17    sj  2008          35            6\n",
-       "18    sj  2008          36            6\n",
-       "19    sj  2008          37            6\n",
-       "20    sj  2008          38            6\n",
-       "21    sj  2008          39            6\n",
-       "22    sj  2008          40            6\n",
-       "23    sj  2008          41            6\n",
-       "24    sj  2008          42            6\n",
-       "25    sj  2008          43            6\n",
-       "26    sj  2008          44            6\n",
-       "27    sj  2008          45            6\n",
-       "28    sj  2008          46            6\n",
-       "29    sj  2008          47            6\n",
+       "0     sj  2008          18           19\n",
+       "1     sj  2008          19           15\n",
+       "2     sj  2008          20           12\n",
+       "3     sj  2008          21           21\n",
+       "4     sj  2008          22           12\n",
+       "5     sj  2008          23           10\n",
+       "6     sj  2008          24           10\n",
+       "7     sj  2008          25           24\n",
+       "8     sj  2008          26           24\n",
+       "9     sj  2008          27           20\n",
+       "10    sj  2008          28           23\n",
+       "11    sj  2008          29           26\n",
+       "12    sj  2008          30           38\n",
+       "13    sj  2008          31           27\n",
+       "14    sj  2008          32           26\n",
+       "15    sj  2008          33           28\n",
+       "16    sj  2008          34           27\n",
+       "17    sj  2008          35           29\n",
+       "18    sj  2008          36           64\n",
+       "19    sj  2008          37           29\n",
+       "20    sj  2008          38           79\n",
+       "21    sj  2008          39           32\n",
+       "22    sj  2008          40           31\n",
+       "23    sj  2008          41           32\n",
+       "24    sj  2008          42           30\n",
+       "25    sj  2008          43           24\n",
+       "26    sj  2008          44           32\n",
+       "27    sj  2008          45           29\n",
+       "28    sj  2008          46           26\n",
+       "29    sj  2008          47           26\n",
        "..   ...   ...         ...          ...\n",
-       "386   iq  2012          48            6\n",
-       "387   iq  2012          49            6\n",
-       "388   iq  2012          50            6\n",
-       "389   iq  2012          51            6\n",
-       "390   iq  2013           1            6\n",
-       "391   iq  2013           2            6\n",
-       "392   iq  2013           3            6\n",
-       "393   iq  2013           4            6\n",
-       "394   iq  2013           5            6\n",
-       "395   iq  2013           6            6\n",
-       "396   iq  2013           7            6\n",
-       "397   iq  2013           8            6\n",
-       "398   iq  2013           9            6\n",
-       "399   iq  2013          10            6\n",
-       "400   iq  2013          11            6\n",
-       "401   iq  2013          12            6\n",
-       "402   iq  2013          13            6\n",
-       "403   iq  2013          14            6\n",
-       "404   iq  2013          15            6\n",
-       "405   iq  2013          16            6\n",
-       "406   iq  2013          17            6\n",
-       "407   iq  2013          18            6\n",
-       "408   iq  2013          19            6\n",
-       "409   iq  2013          20            6\n",
-       "410   iq  2013          21            6\n",
-       "411   iq  2013          22            6\n",
-       "412   iq  2013          23            6\n",
-       "413   iq  2013          24            6\n",
-       "414   iq  2013          25            6\n",
-       "415   iq  2013          26            6\n",
+       "386   iq  2012          48            5\n",
+       "387   iq  2012          49            5\n",
+       "388   iq  2012          50            7\n",
+       "389   iq  2012          51            8\n",
+       "390   iq  2013           1            5\n",
+       "391   iq  2013           2            5\n",
+       "392   iq  2013           3            5\n",
+       "393   iq  2013           4            5\n",
+       "394   iq  2013           5            5\n",
+       "395   iq  2013           6            5\n",
+       "396   iq  2013           7            5\n",
+       "397   iq  2013           8            5\n",
+       "398   iq  2013           9            5\n",
+       "399   iq  2013          10            5\n",
+       "400   iq  2013          11            5\n",
+       "401   iq  2013          12            5\n",
+       "402   iq  2013          13            5\n",
+       "403   iq  2013          14            5\n",
+       "404   iq  2013          15            5\n",
+       "405   iq  2013          16            5\n",
+       "406   iq  2013          17            5\n",
+       "407   iq  2013          18            5\n",
+       "408   iq  2013          19            5\n",
+       "409   iq  2013          20            5\n",
+       "410   iq  2013          21            5\n",
+       "411   iq  2013          22            5\n",
+       "412   iq  2013          23            5\n",
+       "413   iq  2013          24            5\n",
+       "414   iq  2013          25            5\n",
+       "415   iq  2013          26            5\n",
        "\n",
        "[416 rows x 4 columns]"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 79,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/utils/OurPipeline.py b/utils/OurPipeline.py
index 8a1d1dd..d880f4b 100644
--- a/utils/OurPipeline.py
+++ b/utils/OurPipeline.py
@@ -6,14 +6,18 @@
 from utils.LastWeeks import LastWeeks
 from utils.LastInfected import LastInfected
 
-def create_pipeline(attr, n_weeks, n_weeks_infected, estimator_optimizer=None, pca=None, add_noise=False, noise_mean=None, noise_std=None, n_non_train=4):
+def create_pipeline(attr, n_weeks, n_weeks_infected=None, estimator_optimizer=None, pca=None, add_noise=False, noise_mean=None, noise_std=None, n_non_train=4):
+
+    l_infected = None
+    if n_weeks_infected is not None and n_weeks_infected > 0:
+        l_infected = LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)
 
     return Pipeline([
         ('imputer', ContinuityImputer(attributes=attr[n_non_train:])),
         ('l_weeks', LastWeeks(attributes=attr[n_non_train:], weeks=n_weeks)),
-        ('l_infected', LastInfected(weeks=n_weeks_infected, add_noise=add_noise, noise_mean=noise_mean, noise_std=noise_std)),
+        ('l_infected', l_infected),
         ('dataframe_dropper', DataFrameDropper(attribute_names=attr[:n_non_train])),
-        #('scaler', StandardScaler()),
+        ('scaler', StandardScaler()),
         ('pca', pca),
         ('est_opt', estimator_optimizer),
     ]

From 0fdd729218382501114b4570e78ae8c5d14cc88f Mon Sep 17 00:00:00 2001
From: MLobo1997 <miguelobo1997@gmail.com>
Date: Tue, 23 Apr 2019 11:37:45 +0100
Subject: [PATCH 24/24] Ready to deliver phase 1

---
 models.ipynb | 1160 +++++++++-----------------------------------------
 1 file changed, 201 insertions(+), 959 deletions(-)

diff --git a/models.ipynb b/models.ipynb
index c9ab18c..2db22e8 100644
--- a/models.ipynb
+++ b/models.ipynb
@@ -251,9 +251,163 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Optimization\n",
+    "# The most simple prediction\n",
+    "* Our first attempt consists of simply adding weather information from the previous weeks and finding the optimal the optimal parameter through exaustive search (coded by us) and find its optimal hyper-parameters (using `RandomSearchCV`).\n",
     "* Interestingly, PCA makes all the models worst in this case.\n",
-    "* After the exaustive search, the best model was the SVR which obtained an MAE of 6.52."
+    "* It turned out to be a `RandomForestRegressor` as you can see in the `best_attempt` variable. By using this model and adding the 3 previous weeks of weather to each entry, we obtained a MAE of approximately 17 by 10-folded cross validation.\n",
+    "* Unfortunatly, this model (when trained with all the train data) resulted in an 27 MAE when submitted to the platform. This indicates overfitting and that there must be considerable differences between the train and test data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
+    "weeks = [1,2,3,4]\n",
+    "\n",
+    "n_total = len(optimizers) * len(weeks) \n",
+    "\n",
+    "\n",
+    "results=[]\n",
+    "best_attempt = None\n",
+    "best_score = np.inf\n",
+    "idx=0\n",
+    "for opt in optimizers:\n",
+    "    for w in weeks:\n",
+    "        pipeline = create_pipeline(attr, n_weeks=w, estimator_optimizer=opt, pca=None)\n",
+    "        pipeline.fit(X_train_1, y_train)\n",
+    "        score = pipeline.named_steps['est_opt'].best_score_\n",
+    "        best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n",
+    "        attempt = [best_estimator, w, score]\n",
+    "        if abs(score) < best_score:\n",
+    "            best_score = abs(score)\n",
+    "            best_attempt = attempt\n",
+    "            print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n",
+    "        idx+=1\n",
+    "        print(str(idx) + '/' + str(n_total), end='\\t')\n",
+    "        results.append(attempt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n",
+       "            max_features='auto', max_leaf_nodes=None,\n",
+       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "            min_samples_leaf=1, min_samples_split=2,\n",
+       "            min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
+       "            oob_score=False, random_state=None, verbose=0, warm_start=False),\n",
+       " 3,\n",
+       " -17.87464878333245]"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "best_attempt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n",
+       "           max_features='auto', max_leaf_nodes=None,\n",
+       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "           min_samples_leaf=1, min_samples_split=2,\n",
+       "           min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
+       "           oob_score=False, random_state=42, verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "%autoreload\n",
+    "from utils.OurPipeline import create_pipeline\n",
+    "pipeline = create_pipeline(attr, n_weeks=3, pca=None)\n",
+    "X_train = pipeline.fit_transform(X_train_1)\n",
+    "\n",
+    "model = RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2, n_estimators=13, n_jobs=-1, random_state=random_n)\n",
+    "model.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_test = pipeline.transform(X_test_1)\n",
+    "pred = model.predict(X_test)\n",
+    "pred = list(map(lambda x: int(np.round(x)), pred))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Submit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 78,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "submit = pd.DataFrame(pred, columns=['total_cases'])\n",
+    "x_3 = X_test_1.iloc[:,:3].copy()\n",
+    "submit = pd.concat([x_3, submit], axis=1)\n",
+    "submit.to_csv('data/submit.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Prediction with the last infected\n",
+    "* As we could see on the analysis notebook, the number of infected on any week is highly linked to the number of infected at its previous weeks. Including the number of infected (or at least an approximation) on the previous weeks should be key to very accurate predictions.\n",
+    "* For this sake, we created the `LastInfected` module which is included in the pipeline.\n",
+    "* After the exaustive search, the best model was the SVR which obtained an MAE of 6.52 on the training dataset, which is a great improvement.\n",
+    "* Given that we are making sequential predictions, i.e.: the prediction from one week relies on the prediction from the previous weeks, we must make the transformations and predictions one by one.\n",
+    "* The submission MAE was approximately 26, which is an improvement and is not bad given that the `total_cases` feature on the training set ranges from 0 to 400. However, we were expecting a much smaller result.\n",
+    "\n",
+    "### Optimization"
    ]
   },
   {
@@ -267,9 +421,9 @@
     "from sklearn.decomposition import PCA\n",
     "\n",
     "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
-    "weeks = [1]\n",
-    "weeks_infected = [3]\n",
-    "pca = [None]\n",
+    "weeks = [1, 2, 3]\n",
+    "weeks_infected = [2, 3, 4]\n",
+    "pca = [PCA(0.95), None]\n",
     "\n",
     "n_total = len(optimizers) * len(weeks) * len(weeks_infected) * len(pca)\n",
     "\n",
@@ -281,7 +435,7 @@
     "    for w in weeks:\n",
     "        for wi in weeks_infected:\n",
     "            for p in pca:\n",
-    "                pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, add_noise=True, noise_mean=6.5, noise_std=6.5, pca=None)\n",
+    "                pipeline = create_pipeline(attr, n_weeks=w, n_weeks_infected=wi, estimator_optimizer=opt, pca=None)\n",
     "                pipeline.fit(X_train_1, y_train)\n",
     "                score = pipeline.named_steps['est_opt'].best_score_\n",
     "                best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n",
@@ -295,15 +449,6 @@
     "                results.append(attempt)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.DataFrame(results, columns=['estimator', 'weeks', 'weeks_infected', 'PCA', 'score'])"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 37,
@@ -400,9 +545,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## One by one prediction\n",
-    "* Given that we are making sequential predictions, i.e.: the prediction from a week relies on the prediction from the previous weeks, we must make the transformations and predictions one by one.\n",
-    "* Given that this kind of prediction is very prone to a snowball effect on errors our first solution had an error of 26. To solve this we came up with the idea of adding noise to the train data. However for this solution we need to know both: the mean of the error and its standard deviation (*std*). We already know the mean (MAE), we just need to know the *std*"
+    "## One by one prediction"
    ]
   },
   {
@@ -422,22 +565,36 @@
     }
    ],
    "source": [
-    "predictions=[]\n",
-    "for idx in range(X_test_1.shape[0]):\n",
-    "    x = pipeline.transform(X_test_1.loc[idx:idx,:])\n",
-    "    pred = model.predict(x)\n",
-    "    pred = int(np.round(pred))\n",
-    "    pipeline.named_steps['l_infected'].append_y(pred)\n",
-    "    predictions.append(pred)\n",
+    "from utils.predict_in_order import predict_in_order\n",
+    "predictions = predict_in_order(X_test_1, model, pipeline)\n",
     "len(predictions)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "submit = pd.DataFrame(predictions, columns=['total_cases'])\n",
+    "x_3 = X_test_1.iloc[:,:3].copy()\n",
+    "submit = pd.concat([x_3, submit], axis=1)\n",
+    "submit.to_csv('data/submit.csv', index=False)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Calculating an approximation of the *std*\n",
-    "* It is approximately 10.9. We can see that the MAE is close to the one calculated in the cross-validation."
+    "# One by one prediction with noise\n",
+    "* We believe the reason why our predictions were not so great, was because this kind of prediction is very prone to a snowball effect on errors.\n",
+    "* To solve this we came up with an idea: Our problem was currently being trained on data which has all `last_infected` columns with the exact correct values. However, when we are predicting with the test set, the values we use on `last_infected` are mere predictions. By adding random noise to the `last_infected` columns on the training data we would make our model more \"prepared\" to accept entries in which the `last_infected` columns are not so accurate.\n",
+    "* However for this solution we need to know both: the mean of the error and its standard deviation (*std*), so that we can reproduce the error by a gaussian distribution. We already know the mean (MAE), we just need to know the *std*\n",
+    "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our \"synthetic\" noise and the one created by the predictive model.\n",
+    "* The submission's MAE increased again to approximately 27. \n",
+    "* A very for why it isn't working is that the error when y is low is much smaller than when y is high.\n",
+    "\n",
+    "### Calculating an approximation of the *std*"
    ]
   },
   {
@@ -515,38 +672,11 @@
     "np.mean(errors), np.std(errors)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 152,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(6.53353, 4.950353092366241)"
-      ]
-     },
-     "execution_count": 152,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from random import choice, gauss\n",
-    "r=[]\n",
-    "for _ in range(100000):\n",
-    "    r.append(int(np.round(choice([-1,1]) * gauss(mu=0, sigma=8.2))))\n",
-    "r=np.abs(r)\n",
-    "np.mean(r), np.std(r)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# One by one prediction with noise\n",
-    "* When dealing with the test data, the noise adding feature of the pipeline must be disabled, otherwise our predictions will be based on 2 layers of noise: our synthetic noise and the one created by the predictive model.\n",
-    "* A very likely guess for why it isn't working is that the error when y is low is much smaller than when y is high."
+    "### Adding the noise and training"
    ]
   },
   {
@@ -558,42 +688,24 @@
     "%autoreload\n",
     "from OurPipeline import create_pipeline\n",
     "\n",
-    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=0, noise_std=8.2, pca=None)\n",
+    "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=True, noise_mean=6.78, noise_std=10.96, pca=None)\n",
     "X_train = pipeline.fit_transform(X_train_1, y_train)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 155,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "model = RandomForestRegressor(criterion='mae', n_estimators=100, max_depth=3)"
+    "model.fit(X_train, y_train)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 156,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=3,\n",
-       "           max_features='auto', max_leaf_nodes=None,\n",
-       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=1, min_samples_split=2,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,\n",
-       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
-      ]
-     },
-     "execution_count": 156,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "model.fit(X_train, y_train)"
+    "### Disabling the noise and predicting"
    ]
   },
   {
@@ -620,7 +732,8 @@
     "pipeline = create_pipeline(attr, n_weeks=1, n_weeks_infected=3, add_noise=False, pca=None)\n",
     "pipeline.fit_transform(X_train_1, y_train)\n",
     "\n",
-    "predict_in_order(X_test_1, model, pipeline)"
+    "predictions = predict_in_order(X_test_1, model, pipeline)\n",
+    "len(predictions)"
    ]
   },
   {
@@ -642,73 +755,18 @@
     "submit.to_csv('data/submit.csv', index=False)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 158,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "7.860576923076923"
-      ]
-     },
-     "execution_count": 158,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from sklearn.metrics import mean_absolute_error\n",
-    "mean_absolute_error(model.predict(X_train), y_train)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 160,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "371.265"
-      ]
-     },
-     "execution_count": 160,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max(model.predict(X_train))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 163,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "6"
-      ]
-     },
-     "execution_count": 163,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "max(predictions)"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "# Test split of tail\n",
-    "* To simulate what we are doing with the test data, we are going to split the train data, for each city, by sampling N entries from the tail of each city for testing."
+    "* To simulate what we are doing with the test data, we are going to split the train data, for each city, by sampling N entries from the tail of each city for testing.\n",
+    "* We now have 580 entries of train data and 871 entres of test data, to figure out what is wrong.\n",
+    "* Since we can't use `RandomizedSearchCV` with this prediction mode (the one-by-one explained before), we opted to implement our own exhaustive search tool.\n",
+    "* Here we only worked with the `RandomForestRegressor` because it brought results almost as good as the `SVR` model and took far less time training.\n",
+    "* The optimal model turned out to be `RandomForestRegressor` with with 50 estimators and a maximum depth of 5.\n",
+    "* Even though we obtain a MAE of approximately 17 on our custom test set (which has twice as many entries as the one from the competition), when we submit the data with that model we obtain a MAE of approximately 30.\n",
+    "* We are hoping to be able to improve this result on the phase 2 of the project."
    ]
   },
   {
@@ -826,9 +884,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Train\n",
-    "* Since we can't use `RandomizedSearchCV` with this prediction mode, we opted to implement our own exhaustive search tool.\n",
-    "* `RandomForestRegressor`, the best combination was with 50 estimators and a maximum depth of 5."
+    "### Train"
    ]
   },
   {
@@ -881,8 +937,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Submit\n",
-    "* Even though we obtain a MAE of approximately 17 on our custom test set (which has twice as many entries as the one from the competition), when we submit the data with that model we obtain a MAE of approximately 30."
+    "### Submit"
    ]
   },
   {
@@ -915,819 +970,6 @@
     "submit = pd.concat([x_3, submit], axis=1)\n",
     "submit.to_csv('data/submit.csv', index=False)"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# A more simple prediction\n",
-    "* Given that we are not being able to make a very accurate prediction, perhaps the problem is the fact that we are trying to use the previous infected attribute, which clearly has potential, however we are not being able to harness it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%autoreload\n",
-    "from utils.OurPipeline import create_pipeline\n",
-    "from sklearn.decomposition import PCA\n",
-    "\n",
-    "optimizers=[Tree_optimizer, Forest_optimizer, AdaTree_optimizer, KNN_optimizer, SVR_optimizer]\n",
-    "weeks = [3]\n",
-    "\n",
-    "n_total = len(optimizers) * len(weeks) \n",
-    "\n",
-    "\n",
-    "results=[]\n",
-    "best_attempt = None\n",
-    "best_score = np.inf\n",
-    "idx=0\n",
-    "for opt in optimizers:\n",
-    "    for w in weeks:\n",
-    "        pipeline = create_pipeline(attr, n_weeks=w, estimator_optimizer=opt, pca=None)\n",
-    "        pipeline.fit(X_train_1, y_train)\n",
-    "        score = pipeline.named_steps['est_opt'].best_score_\n",
-    "        best_estimator = pipeline.named_steps['est_opt'].best_estimator_\n",
-    "        attempt = [best_estimator, w, score]\n",
-    "        if abs(score) < best_score:\n",
-    "            best_score = abs(score)\n",
-    "            best_attempt = attempt\n",
-    "            print('\\nBest score of {} with the estimator {}'.format(best_score, best_estimator))\n",
-    "        idx+=1\n",
-    "        print(str(idx) + '/' + str(n_total), end='\\t')\n",
-    "        results.append(attempt)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 63,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-18.341489</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-18.084080</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>DecisionTreeRegressor(criterion='mae', max_dep...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-17.886975</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-18.338104</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-18.133689</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-17.874649</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-20.234666</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>2</td>\n",
-       "      <td>-20.272226</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>(DecisionTreeRegressor(criterion='mae', max_de...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>-19.484149</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>KNeighborsRegressor(algorithm='auto', leaf_siz...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>-20.432433</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                   0  1          2\n",
-       "0  DecisionTreeRegressor(criterion='mae', max_dep...  1 -18.341489\n",
-       "1  DecisionTreeRegressor(criterion='mae', max_dep...  2 -18.084080\n",
-       "2  DecisionTreeRegressor(criterion='mae', max_dep...  3 -17.886975\n",
-       "3  (DecisionTreeRegressor(criterion='mae', max_de...  1 -18.338104\n",
-       "4  (DecisionTreeRegressor(criterion='mae', max_de...  2 -18.133689\n",
-       "5  (DecisionTreeRegressor(criterion='mae', max_de...  3 -17.874649\n",
-       "6  (DecisionTreeRegressor(criterion='mae', max_de...  1 -20.234666\n",
-       "7  (DecisionTreeRegressor(criterion='mae', max_de...  2 -20.272226\n",
-       "8  (DecisionTreeRegressor(criterion='mae', max_de...  3 -19.484149\n",
-       "9  KNeighborsRegressor(algorithm='auto', leaf_siz...  1 -20.432433"
-      ]
-     },
-     "execution_count": 63,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pd.DataFrame(results)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 62,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n",
-       "            max_features='auto', max_leaf_nodes=None,\n",
-       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "            min_samples_leaf=1, min_samples_split=2,\n",
-       "            min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
-       "            oob_score=False, random_state=None, verbose=0, warm_start=False),\n",
-       " 3,\n",
-       " -17.87464878333245]"
-      ]
-     },
-     "execution_count": 62,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "best_attempt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 69,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2,\n",
-       "           max_features='auto', max_leaf_nodes=None,\n",
-       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
-       "           min_samples_leaf=1, min_samples_split=2,\n",
-       "           min_weight_fraction_leaf=0.0, n_estimators=13, n_jobs=-1,\n",
-       "           oob_score=False, random_state=42, verbose=0, warm_start=False)"
-      ]
-     },
-     "execution_count": 69,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%autoreload\n",
-    "from utils.OurPipeline import create_pipeline\n",
-    "pipeline = create_pipeline(attr, n_weeks=3, pca=None)\n",
-    "X_train = pipeline.fit_transform(X_train_1)\n",
-    "\n",
-    "model = RandomForestRegressor(bootstrap=True, criterion='mae', max_depth=2, n_estimators=13, n_jobs=-1, random_state=random_n)\n",
-    "model.fit(X_train, y_train)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 76,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_test = pipeline.transform(X_test_1)\n",
-    "pred = model.predict(X_test)\n",
-    "pred = list(map(lambda x: int(np.round(x)), pred))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Submit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 78,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "submit = pd.DataFrame(pred, columns=['total_cases'])\n",
-    "x_3 = X_test_1.iloc[:,:3].copy()\n",
-    "submit = pd.concat([x_3, submit], axis=1)\n",
-    "submit.to_csv('data/submit.csv', index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 79,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>city</th>\n",
-       "      <th>year</th>\n",
-       "      <th>weekofyear</th>\n",
-       "      <th>total_cases</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>18</td>\n",
-       "      <td>19</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>19</td>\n",
-       "      <td>15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>20</td>\n",
-       "      <td>12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>21</td>\n",
-       "      <td>21</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>22</td>\n",
-       "      <td>12</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>23</td>\n",
-       "      <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>24</td>\n",
-       "      <td>10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>25</td>\n",
-       "      <td>24</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>26</td>\n",
-       "      <td>24</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>27</td>\n",
-       "      <td>20</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>28</td>\n",
-       "      <td>23</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>29</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>30</td>\n",
-       "      <td>38</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>31</td>\n",
-       "      <td>27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>32</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>33</td>\n",
-       "      <td>28</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>34</td>\n",
-       "      <td>27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>35</td>\n",
-       "      <td>29</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>36</td>\n",
-       "      <td>64</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>37</td>\n",
-       "      <td>29</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>38</td>\n",
-       "      <td>79</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>39</td>\n",
-       "      <td>32</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>40</td>\n",
-       "      <td>31</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>41</td>\n",
-       "      <td>32</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>42</td>\n",
-       "      <td>30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>43</td>\n",
-       "      <td>24</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>44</td>\n",
-       "      <td>32</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>45</td>\n",
-       "      <td>29</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>46</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>sj</td>\n",
-       "      <td>2008</td>\n",
-       "      <td>47</td>\n",
-       "      <td>26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>386</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>48</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>387</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>49</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>388</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>50</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>389</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2012</td>\n",
-       "      <td>51</td>\n",
-       "      <td>8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>390</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>391</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>392</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>3</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>393</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>4</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>394</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>5</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>395</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>6</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>396</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>7</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>397</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>8</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>398</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>9</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>399</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>10</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>400</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>11</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>401</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>12</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>402</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>13</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>403</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>14</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>404</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>15</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>405</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>16</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>406</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>17</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>407</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>18</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>408</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>19</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>409</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>20</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>410</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>21</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>411</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>22</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>412</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>23</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>413</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>24</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>414</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>25</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>415</th>\n",
-       "      <td>iq</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>26</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>416 rows × 4 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    city  year  weekofyear  total_cases\n",
-       "0     sj  2008          18           19\n",
-       "1     sj  2008          19           15\n",
-       "2     sj  2008          20           12\n",
-       "3     sj  2008          21           21\n",
-       "4     sj  2008          22           12\n",
-       "5     sj  2008          23           10\n",
-       "6     sj  2008          24           10\n",
-       "7     sj  2008          25           24\n",
-       "8     sj  2008          26           24\n",
-       "9     sj  2008          27           20\n",
-       "10    sj  2008          28           23\n",
-       "11    sj  2008          29           26\n",
-       "12    sj  2008          30           38\n",
-       "13    sj  2008          31           27\n",
-       "14    sj  2008          32           26\n",
-       "15    sj  2008          33           28\n",
-       "16    sj  2008          34           27\n",
-       "17    sj  2008          35           29\n",
-       "18    sj  2008          36           64\n",
-       "19    sj  2008          37           29\n",
-       "20    sj  2008          38           79\n",
-       "21    sj  2008          39           32\n",
-       "22    sj  2008          40           31\n",
-       "23    sj  2008          41           32\n",
-       "24    sj  2008          42           30\n",
-       "25    sj  2008          43           24\n",
-       "26    sj  2008          44           32\n",
-       "27    sj  2008          45           29\n",
-       "28    sj  2008          46           26\n",
-       "29    sj  2008          47           26\n",
-       "..   ...   ...         ...          ...\n",
-       "386   iq  2012          48            5\n",
-       "387   iq  2012          49            5\n",
-       "388   iq  2012          50            7\n",
-       "389   iq  2012          51            8\n",
-       "390   iq  2013           1            5\n",
-       "391   iq  2013           2            5\n",
-       "392   iq  2013           3            5\n",
-       "393   iq  2013           4            5\n",
-       "394   iq  2013           5            5\n",
-       "395   iq  2013           6            5\n",
-       "396   iq  2013           7            5\n",
-       "397   iq  2013           8            5\n",
-       "398   iq  2013           9            5\n",
-       "399   iq  2013          10            5\n",
-       "400   iq  2013          11            5\n",
-       "401   iq  2013          12            5\n",
-       "402   iq  2013          13            5\n",
-       "403   iq  2013          14            5\n",
-       "404   iq  2013          15            5\n",
-       "405   iq  2013          16            5\n",
-       "406   iq  2013          17            5\n",
-       "407   iq  2013          18            5\n",
-       "408   iq  2013          19            5\n",
-       "409   iq  2013          20            5\n",
-       "410   iq  2013          21            5\n",
-       "411   iq  2013          22            5\n",
-       "412   iq  2013          23            5\n",
-       "413   iq  2013          24            5\n",
-       "414   iq  2013          25            5\n",
-       "415   iq  2013          26            5\n",
-       "\n",
-       "[416 rows x 4 columns]"
-      ]
-     },
-     "execution_count": 79,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "submit"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {