From 6dfc03b35ee58660602b388d6f508d2a1374562a Mon Sep 17 00:00:00 2001 From: Pedro Marim Date: Sun, 22 Feb 2026 17:29:09 +0100 Subject: [PATCH 01/14] Setup data --- tools/setup_data.py | 83 ++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/tools/setup_data.py b/tools/setup_data.py index 5bdc3a9..edbe29e 100644 --- a/tools/setup_data.py +++ b/tools/setup_data.py @@ -1,53 +1,74 @@ -# Script to download the data from a given source and create the splits -# This is a mock version that generate fake problems +# Script to load the S&P500 data and create the splits for the benchmark from pathlib import Path -import numpy as np import pandas as pd -from sklearn.datasets import make_classification -from sklearn.model_selection import train_test_split -PHASE = 'dev_phase' +PHASE = "dev_phase" -DATA_DIR = Path(PHASE) / 'input_data' -REF_DIR = Path(PHASE) / 'reference_data' +DATA_DIR = Path(PHASE) / "input_data" +REF_DIR = Path(PHASE) / "reference_data" + +RAW_DATA_PATH = Path("raw_data") / "sp500_raw.csv" +TARGET_COL = "Target" def make_csv(data, filepath): filepath.parent.mkdir(parents=True, exist_ok=True) - pd.DataFrame(data).to_csv(filepath, index=False) + data.to_csv(filepath, index=False) if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser( - description='Load or generate data for the benchmark' - ) - parser.add_argument('--seed', type=int, default=42, - help='Random seed for data generation') - args = parser.parse_args() - - # Generate and split the data - rng = np.random.RandomState(args.seed) - X, y = make_classification(n_samples=500, n_features=5, random_state=rng) - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.4, random_state=rng - ) - X_test, X_private_test, y_test, y_private_test = train_test_split( - X_test, y_test, test_size=0.5, random_state=rng - ) + # Load the S&P500 data + print(f"Loading data from {RAW_DATA_PATH}") + df = pd.read_csv(RAW_DATA_PATH) + + # Separate features and target + y = df[TARGET_COL] + X = df.drop(columns=[TARGET_COL]) + + n = len(df) + train_end = int(n * 0.6) + test_end = int(n * 0.8) + + # Split chronologically: 60% train, 20% test, 20% private_test + X_train, y_train = X.iloc[:train_end], y.iloc[:train_end] + X_test, y_test = X.iloc[train_end:test_end], y.iloc[train_end:test_end] + X_private_test, y_private_test = X.iloc[test_end:], y.iloc[test_end:] + + print(f"Dataset shape: {df.shape}") + print(f"Features: {X.shape[1]}, Samples: {n}") + print(f"Target distribution:\n{y.value_counts()}") # Store the data in the correct folders: # - input_data contains train data (both features and labels) and only # test features so the test labels are kept secret # - reference_data contains the test labels for scoring for split, X_split, y_split in [ - ('train', X_train, y_train), - ('test', X_test, y_test), - ('private_test', X_private_test, y_private_test), + ("train", X_train, y_train), + ("test", X_test, y_test), + ("private_test", X_private_test, y_private_test), ]: split_dir = DATA_DIR / split - make_csv(X_split, split_dir / f'{split}_features.csv') + make_csv(X_split, split_dir / f"{split}_features.csv") label_dir = split_dir if split == "train" else REF_DIR - make_csv(y_split, label_dir / f'{split}_labels.csv') \ No newline at end of file + make_csv( + pd.DataFrame({TARGET_COL: y_split}), + label_dir / f"{split}_labels.csv", + ) + + print("\nData splits created successfully!") + print( + f"{'Split':<15} {'Samples':<10} {'First Date':<15} {'Last Date':<15}" + ) + print("-" * 55) + for split, X_split in [ + ("train", X_train), + ("test", X_test), + ("private_test", X_private_test), + ]: + first_date = X_split["Date"].iloc[0] + last_date = X_split["Date"].iloc[-1] + print( + f"{split:<15} {len(X_split):<10} {first_date:<15} {last_date:<15}" + ) From eee907b0e9e8bc332a6267713d9221f39756780e Mon Sep 17 00:00:00 2001 From: Pedro Marim Date: Sun, 22 Feb 2026 17:34:50 +0100 Subject: [PATCH 02/14] Folder for raw data --- raw_data/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 raw_data/.gitkeep diff --git a/raw_data/.gitkeep b/raw_data/.gitkeep new file mode 100644 index 0000000..e69de29 From f5d2dbd6c169ab18c4005de3e69f88029b550d3a Mon Sep 17 00:00:00 2001 From: nnoya Date: Sun, 22 Feb 2026 17:25:43 +0100 Subject: [PATCH 03/14] Ingestion first steps --- .gitignore | 1 + competition.yaml | 61 ++++++++++++++++++-------- ingestion_program/ingestion.py | 77 ++++++++++++++++++++++++++------- logo.png | Bin 3797 -> 27990 bytes solution/submission.py | 8 ++-- tools/Dockerfile | 18 ++++---- 6 files changed, 120 insertions(+), 45 deletions(-) diff --git a/.gitignore b/.gitignore index 9728749..4c2f2aa 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ ingestion_res/* scoring_res/* dev_phase/* +*.pth diff --git a/competition.yaml b/competition.yaml index 3a1e4de..44a4a74 100755 --- a/competition.yaml +++ b/competition.yaml @@ -1,8 +1,26 @@ version: 2 -title: Templat competition - Dummy classification -description: Dummy classification task +title: "Autoregressive Forecasting of the S&P 500 Index" +description: > + Can you predict whether the S&P 500 will close up or down — using only what you know by mid-morning? + + Each trading day, participants receive a feature vector built from: + - Intraday morning signals: the day's open price and early price action + (e.g. open-to-first-hour return, morning high/low range, opening gap vs previous close). + - Historical context: past N days of daily OHLCV data, log-returns, and + rolling statistics (volatility, momentum) up to and including the previous close. + + The target label is binary: **1** if the day's close is strictly above the previous close, + **0** otherwise. No look-ahead is permitted — only information available before noon (ET) + may be used as features for the current day. + + Participants submit a scikit-learn–compatible model via a `submission.py` file + exposing a `get_model()` function. The model is trained server-side on historical + data and evaluated on a held-out test window using **directional accuracy** + (fraction of days where the predicted direction matches the actual close direction). + + This is a DataCamp challenge organised at École Polytechnique (INF554 / MAP583). image: logo.png -registration_auto_approve: False # if True, do not require approval from admin to join the comp +registration_auto_approve: False # set to True to skip manual approval terms: pages/terms.md pages: @@ -15,8 +33,15 @@ pages: tasks: - index: 0 - name: Developement Task - description: 'Tune models with training data, test against examples contained in public test data' + name: Development Task + description: > + Same-day close direction forecasting of the S&P 500 using morning information. + Each sample consists of: (i) intraday morning features for the current trading day + (opening gap, open price, early price action) and (ii) historical daily features + from the past N sessions (log-returns, OHLCV, rolling volatility, momentum). + The label is 1 if today's close > previous close, 0 otherwise. + No information after the morning window may be used; models are scored on + directional accuracy over a public held-out test window. input_data: dev_phase/input_data/ reference_data: dev_phase/reference_data/ ingestion_program: ingestion_program/ @@ -25,13 +50,15 @@ tasks: solutions: - index: 0 tasks: - - 0 + - 0 path: solution/ - phases: - name: Development Phase - description: 'Development phase: tune your models.' + description: > + Tune and validate your autoregressive model using the provided historical + S&P 500 training data. Your predictions are scored against a public test set + so you can iterate quickly. Unlimited submissions are allowed in this phase. start: 10-07-2025 end: 03-31-2026 tasks: @@ -41,20 +68,20 @@ leaderboards: - title: Results key: main columns: - - title: Test Accuracy + - title: Directional Accuracy (public test) key: test index: 0 - sorting: asc - - title: Private Test Accuracy + sorting: desc # higher is better + - title: Directional Accuracy (private test) key: private_test index: 1 - sorting: asc - hidden: True - - title: Train time + sorting: desc + hidden: True # revealed only after the phase ends + - title: Train Time (s) key: train_time index: 2 - sorting: desc - - title: Test time + sorting: asc # lower is better + - title: Predict Time (s) key: test_time index: 3 - sorting: desc + sorting: asc diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py index f150b05..d8a6f93 100755 --- a/ingestion_program/ingestion.py +++ b/ingestion_program/ingestion.py @@ -4,23 +4,68 @@ from pathlib import Path import pandas as pd +import torch + + +class SP500Dataset(torch.utils.data.Dataset): + """A PyTorch Dataset class for the S&P 500 forecasting problem. It takes in a CSV file with features and target, and returns + windows of features and targets for training a model. The window size can be specified, and if the window is larger than the + specified index, it will be padded with zeros at the beggining. + """ + + def __init__(self, data_path, window_size=50): + self.data_path = data_path + self.window_size = window_size + self.data = pd.read_csv(data_path) + self.y = self.data["Target"].values + self.X = self.data.drop(columns=["Target"]).values + + def __len__(self): + return len(self.y) + + def __getitem__(self, idx): + """Return the features and target for the given index, the index will be the last day of the window, + the final tensor should be of shape (window_size, n_features) and the target should be a tensor of shape window_size. + """ + window_start = max(0, idx - self.window_size + 1) + # if the window is smaller than the window size, we will pad it with zeros + window = self.X[window_start : idx + 1] + target = self.y[window_start : idx + 1] + if len(window) < self.window_size: + padding = self.window_size - len(window) + window = torch.cat( + [ + torch.zeros((padding, self.X.shape[1])), + torch.tensor(window, dtype=torch.float32), + ] + ) + target = torch.cat( + [ + torch.zeros(padding, dtype=torch.float32), + torch.tensor(target, dtype=torch.float32), + ] + ) + return window, target EVAL_SETS = ["test", "private_test"] def evaluate_model(model, X_test): - - y_pred = model.predict(X_test) - return pd.DataFrame(y_pred) + """Evaluate the model on the test set. This function returns a pandas DataFrame with the predictions for the test set.""" + y_pred = [] + test_loader = torch.utils.data.DataLoader( + X_test, batch_size=1, shuffle=False + ) + for x, _ in test_loader: + y_pred.append(model(x)[-1]) + return pd.DataFrame({"Prediction": y_pred}) -def get_train_data(data_dir): - data_dir = Path(data_dir) - training_dir = data_dir / "train" - X_train = pd.read_csv(training_dir / "train_features.csv") - y_train = pd.read_csv(training_dir / "train_labels.csv") - return X_train, y_train +def get_dataset(data_dir): + """Load the training dataset from the given data directory. This function returns a PyTorch Dataset object.""" + train_data_path = Path(data_dir / "train" / "train_features.csv") + return SP500Dataset(train_data_path) def main(data_dir, output_dir): @@ -28,21 +73,23 @@ def main(data_dir, output_dir): # submission from submission import get_model - X_train, y_train = get_train_data(data_dir) + X_train = get_dataset(data_dir) + data_loader = torch.utils.data.DataLoader( + X_train, batch_size=32, shuffle=True + ) print("Training the model") - model = get_model() - start = time.time() - model.fit(X_train, y_train) + model = get_model(data_loader) train_time = time.time() - start - print("-" * 10) + + print("=" * 40) print("Evaluate the model") start = time.time() res = {} for eval_set in EVAL_SETS: - X_test = pd.read_csv(data_dir / eval_set / f"{eval_set}_features.csv") + X_test = get_dataset(f"{eval_set}.csv") res[eval_set] = evaluate_model(model, X_test) test_time = time.time() - start print("-" * 10) diff --git a/logo.png b/logo.png index 9616456c1d354140c0055fb6ee32e23d76519df3..5255f0412f44a9d6adf3afac6fa85fb57f7216af 100644 GIT binary patch literal 27990 zcmV(rK<>X%Nk&FaZ2$mQMM6+kP&gn$Z2$le^8uX!Du4oL0X~sFnn@+2E2^n;T-k6E z31n^S!2^MU1|N{MifTR9a~=A#_J894c>m0M*Kwa`zSH}^`9JyJ5MR*m&#?T~Iv4cU z=7-Cl_Fwpa#e1~=tNw@okNIzauh`E%4_hC+9;H99f3*GQ_j>=c`^E1^`q%w0%O8?o z=^ycW;r$c;cKu*K`Fl!#R{y#C%k9CNf&P=zCzHRn{{#6M+&lFj;lFjgopg8GW~e_h z{IUH{@gCFv%J6sR7NH-Oe`Wt+;_tjN=K2f#8}{GXPg&o<^QZVf_ujx?upT@j8xZXmnpl2z zO4iR6`bcYCRK5$=s3gV%piOVX?yB@dC8!8yRbVNk&$svg<3Lhj2Uz-6B>l)@{(wLJ zw|>pJTFW@MYQ!kW0bT9I+_9NM+r{d8e6gW_nP6&Cm2LLZXp%boUX+7>1>@C7^z4_I z(gRbd?xA3zWx_T8oGq^;Kyqw{bfl8}%;0u8`voQF>(e`x^?58n=Ba2kOg7f|gXefu z&ry+t!}kXE%$wCbP9VW{+QD`|{L0#MrX}?H1_f9#Ogc8|^2ta{+%?FC(f0CWC5vtc z>DP;dD0sorSd%BUL%V4;kgvA(H?R5Dt7p?iVsW!iYkEy34;>8J#c;VVLV4Lg;aMFbpb)u+NmudC`C?B1_Dh3%mObBcXQ5i{>lNAG zbGj_bsIMRKjxHQQOXYDeb{6muN;KYjSunkgz-Z#mjGiAq(`wFe=i}EEn7zL@Bx+=a zJ;c(DHz)5SMnwNxHyK!K4bwff+RV0^+-a{wM1olUZOvHqcQy zc-!g62rjJ)nr3eh0n-eKHHB6EO+`%@N*b;dSWyalb3Ul&9!Ki3pQ3bcR&G2vXL4{; zFT}@Apscy*k5nYm@t%I0#?v1R7Vv#36ZV&)7!l$+)GwckuYb%j*`il{~r8^DI10+nIe&zB<|1Yi$<*@XS#) zS~>RQCpMn+YJeq$z(X9*|BVeT5F|VNkj`hg&hnC*zJSYG$UVUH3WopGUK!E6ezMmj^AFZ7Rwd2~e$G|vS%SN)NfSvNkYj$AKsJ9*eZ_1HoC8mi z04Kk}QX#k5h{tg7K#l^7{w@$HK`SOSt4|HyYY&nEQwB#1AdzbPz;E|BUm``o5(r4G zzsl>}X-FUd)+Q~fl;-YZjeZDva<|#PPI8|y{MRa=oVIsMx`IrKK3ScOc98U;*g~*p zx&>&OpyXF-u?j#R9k+7y7D9AnxaAOsxg@&rkRYDISq@}F{9jo)R%!2X`u@Y2PG>7x z#l1|FLv3l+o0@(7i?G$t9oISFTY;0jsp3q;1Y*)G*BSXJAOxM?r!V(N=4T<25DvSH57Uc0HsA3zv@u@%NqoEL^#u8k!d#U(a1ai?jewgkL&L8hapnIgF-dbOMJgrb}CJW}$GciI6V zL~4i90|{Cj1A}Cw23T+cI9vtx^WG7Nb)(3f1uBd|lbNKwn1U?8#YS4&OR$jMA@_2;~SRzF~(bg_Xw`fhikB+h}eeGg-wYL^Z^u+c>Y_y zqf^JESLDwzkTI3@z5O*4#9f8vbEDdKfZk9+otbgbOTEQ%=9umldwr)bkc< zMr4<Q@09r9Y9BXn6tyUR(CXv)V$PvUc1Xy(R2V3>3UtW+4Vn$nB;WVh1B zWwe}T&m`D)VV3WSiu5}}i4R3IaOHx4Uu#wu1} zL!|MdR-bl$vp9Y0a0dhNR?=d3#A)YOf9CH#70Ba$i1_9J1p z$_V?}@8l2Jmq+1!-OQ{Ojs>MxBP{2o?s)Fh{;SCNmgk$A+F;1Sk;-jJ^8rA>Rx}g&fo!B8!3P6^-Ff_DF_Or`p{Ix5jkgt55W&f{98bu1{AexL7kQmgElvoTc$9blXUw$2 zd)rBou1sBo?$)=<`H#aL`XjKQ%Z@YU*u8IdWW9;8g>Cxce>3yX>t17=7u*YRS_pf| zkj~G0b$y^gW!t@LsK+8nR1^6~mc^AM`@IJ?vnGjt;&eJzY`i5Z^aE)mhh+_bCFF(+ zUgkyC=UA&5xAyEspYIX`v)$HzoN}&;APrpVZp|L97Q_3kBf+glq$msY82rQg%)R*s zU3F8QLK^;x)fdkM6{_E^pASqn6^{88PSTFn&Vk^gv|AvFq{iT_BBw6a)XQ5j_A;fb zZR*C-y7YzoD^RzxB2CLy9saw%A@icAAg33ni<>=j)LISagXt`*>A^|!%xcZdBae%3 zw4gr?Mg89@jy5n0kQp)oSI|lVhyiKwU;>~^U_1kE)i0PklNDs2zuQRq00}R`6EyM$ z!@Usg@#Mr;ONjIFJSW83CYjNC>VsrW02GqdfXM5ltk>yB{fx4JSEx%wRjzO1W(Ccq zBP6J&WAS@v40M-3fAM@e7ihrxj*_WKU<`g@Q1UX3qdYNaNuK7Ea)lzJil(qfK>33C zrFv8{#xwd$?Kz68%vnda_6P%5{0Vsoosfqj;)zL0RtDs5{8W6S>1~NZKKKBGy+iLG z#m}j$H~ra+<&w;n%nOtEot9Azu{;Q2L_u)PAg0Yrh{!+dO~(2p=hhv7+eICawRM^P z4#fZ4!gLR=Ri`d8)oP`_GjIm0Q(<@Biuzho#zh#x8lWI*oQ_JSW9Uc78BaxuP_l5V zn+8%Jf29kxDydQ#Y&=zc%t4b*#|Dl{UTt{7lZ4$)6dO=xOZ_qLjyWoM-aoEEu43Xb zyH)EA(#LI;g)tCRB(@jTYFz5|1bbGiAhjHjEbhR_lJeZpE_xy&?7PD{H|Pi@GCB0= zGbcG)so0@0&@%2Y%J0f&D1Ez%t(yb=5sVGYp7xV`$u9?cQZ{0Yn93o(PH3^W`lVnn z*jI**rf5D4iTdSOQbips%9oOm8lNoa7qW|L6)_Cgu~Y( z@I^;P7UgJY=Jj!tH|34l$aF2zTZ^D(dXQI1Zb6(C0W=XwcXLIeLskF18Khsuk?Fm< zw!D=bk61gEL&0&otCqvEmB>6p3YQ5>sapxT8YK;|ppMAk84GPe-WH+U-F_yA{#_02 z{{1*TW@pJpFou?dH1tlwnXM9C!}MHNe|GDIPJZo|~mpKBwuG z2Ov;ff=vKv(tq3i08fyaZ;6EZzY-Kr^v)rA4KUhNGHyHAN0XJc66Bo^8(LB(5q@tq zfKzE~a;Um*!J&s`Y~@2)!kK-8dw!W(azCP}G~3xxMvXHEAr<~|9^|IHlr*BZ`S1gK zbw5!glsH@VrYy{TSo8aJxwRp+LC1p70RHYV)Ntb(>z|Qd&x9V@H&(va(z{^3;?l7` za8AA{{r1LS6yR90%F2m6=JtjyKSGz8RN1YJFlNHcgw*nvx5d=gG~pXkQp6!elL-gd z%-ZiH`!tLUzZMs>T~onlt^oShSsaY}X{%AGcKh4H$Tp{*6+xdb!+L7nQal|7zfh5P zK^d2mgckx$xsV@K9}Ys46Xo(QFza^f&(uD?ugd^^IqCZE2&)WVstKuG1poHj2UQsy zNJ23l!I{0*(kkmuI`dGF!JKz z?R)zKJm>KrJw%WO501CuR&a&14~_`S30o$zs7@&lSo(n)b+om{?d3qOK_~^jE!TGj z-p7x@GYPc^tUh}8v6}_wWY`tGV{gk6b9LoEp?`wFE8UE&PONzdTP-O2nSmZ&?BiU!C^uqSz{Hj95 zFu#+9fYNJ!#m~orO%{WHPIk%lykmf0hfz}WmtsT@uT2TY=_()a>T*V0d|OY7)tb8z z9;eW>yF^h{)u}@8W>%9n4+G~A5c0^yj--qg;+i{UT21Hbn{upxgR3?;jJR&A`;^YG zxqiUwiNAok-ndEBZdeam5Ak0G9E@q_tQU4DZBoz9aiFK*ByO-ct_#(A)L+e5`5~!S zmRCvbu`c81p`vmX`jN`piom|MBZ*#G*;r~w$Hlr%z9yhbd{RJW{eN_}r$}r-(M5mZ zub+}JxoeAe8ueB7&2OxbmFe%YNM^!_@;stUCyef&7OX8@-Mn1v{&iI7dr@fYJcbJ* zs|I#rl_T=ukUM}v$!w23dJfCxrsIyxnEUzc&O?1`#Rj?!DpHw^IK?3VyeR_wyvtpc zN&_a)a}Opt(j`L=rblzNJ~SimiNBK_pZ^|1l2@iU9gQq^b&@6mDFj!gF8g3_fnBmG8=P?$FcHm-j=X5}ui9=Qh9+(g|`P{e1%c zLkLQKfF&+t)$OtWjjGD_K_g!nmS2e9UP>Z`x|h*q%5MI*vf+j3dK%J{!zGAII5@nz z!HQwo~smOnTAsRJkhibiX(9Hff!P#pG`m9p+V=7 zx{he?7-^%Yx3H5&K`$MajpaSY?O;QXvs$e<-4F+CCE^ zDCIbN-;&4&@6|-?ghPIibMLy0I#FxTH(l7i#ICQzg4b619D=KiT{ferb&|)xgsL2( zq=s^aDZUcSa%9CfwU|Zmmr=@UxU)6J=vik!QVF?b+X|2DqMPSiT>7V1T#Jbt-1L?W zC@Zj0YM90yZkon;KNrFkAgVUf64RL|WlcVipuwfs7+}v`IhK`Q--f4uUpJlZ>$*sZ zOv)N8?^+_H7!l2bot~9e-LuY@jJOupaYniFDfK2XXh43IhS6@P1gm{n9oSR!#)KQJ zHTyXKVhji8ee66pelJL4l3(Y_@sS~^or1G-S~OIj`P3RzC781>p3U8@iLb?=ZA$_e z@Y49-f(4ChTK|O?)_EN#7OImyl&Ow)x1{cuE%RxW;p*?3qs%d`nRAPAOJE&jH!!8DbsfBrF=dr6$+{@vJWkiOb8wpjNyL`C{>7L$w;R( zWexKLC{FcuV0e4Jq3WOWEUzTO?4chvQ>*354KJhC`yYefZ;UTK7A7atuXZ(uQglNd zmbm#nQ(ym=F(I!B3@mw`lPbzaqrIV~R@i_pY$mABtK4|J6Ww z044|HXV&W53x+-Wk#7==53t=;wLZ=G$z51j0w}hBKgE$qMnmu1v4GiC-#kGNFs7aR=r85Ewvdc19(|=f65Ye;|p2Et9NL z3V3Y)G-Z?fzXrt2^laY$o>hT^WpYQbDe{I;Z(0+{Tj+xhVgm>QC$Dw;0?w(y24P+#4NLIDyAEFZez-JN8Xz~l9})cNhBOUu;1yjnW5k-iQF1tn+g&Xq%ooP?z6H+^RMkw(Uq+a0D&8OP@re~02 zE$7sbjgt61sJdAwtWB50?yRd&BgylZjO?J?(-FPu8y9&}qNAIUfp-K9CjU2yABbx_h zS^`q=Z{>w&9Fsngc3E2cvgix)8QL0s?WH# zVBzhc_(v$~#;C_mH}gc4FEZ8jVgTdQt9gO9Yb-t_K9lCD&DHLCvkfU4AM?=JbR3$= zc($S7>)lZ$lK45eG07=UcpSCMoPDp7DwS5E$*d0MpaCnJPW)jr&0m)*TI+Rux-@>1 z*dPw-aLo(p0JDR?&U7sw$yPb=HPYX9!PjBiw(w%>|4kk`&9h<+8SVAznSv6);N!hZ z)rjP1$v~(lL_Tt>DhESBlQFmQOd!?)Jmz@WCLkwJ$N;6aD+@xF43!sK&;nVvhETKX zA(wtuxjMvH5T3wqs=;IRD%|w1+y7wSLLf^pWr@e>4unHL2Pxu=d1|7aFiH{fBf2%q zoJ04!5Jk($CXb$cvOrasGgy>Om42w1XqG^{O6?<-{si1}RikgC=Jt%Y>p@r5{}coM-g{tB zz)T-{wd`wo&0ma)ME2eBISizla_mG-F-d3Ri13|qdoZzRJys>F0RkT_TmqNb)|_Kr zs-64yuogO1MwaZ7P#^>x1xf|S|RR2`c_{M&c zv!gruELFTTsK{kJb~H8mwi2jdP^X>}n=k=m&&bz2J^W!!;D9{=tO(`@b0`6fL;|dN z@|1I3{w#p}voG#E=}T*$DYkvC|LXT)5xR7AESvkYh3e!APKuMv+Dq@*CwcWFc=KXd~4s( zhx-7D$`GqFX%Ziet0#i0T!q5Qer5Q#r?AJd?+L~9H4;xd! z;}(;(qZ2br03_1@U?gBU&#s<&XVOymhW_wSE}^7{>qKY%rwF9xEcPR)flL?9h(PmX zvJb5t_3m1ls_FOlDN2aL!jFNCSFEX{_}4bv!xJX867opY4Pm%FU;r_5d4Ha3tQvzW zzmhyWEElr&m!o1Hh&-$PzK9;YkP{`RI>*c)DYREg7@OV%Ep9g-$<L=PCrJ{Y2el6pxpa6Fe-mo3l1YWV({GTV8f}=!6mYJ-g*?VcppV3I^m6t_AVK zab@@ov5j7j41&>5Vkaa_5lJq~PXFcivH=W`Q zm|SEku2bOd=`(;x1_g#jH0%j}?Mbg%PsfHt`Q z{^#V-6}%pOPOv}LGNWUXK2p9C>V&~XN5gg! zF^@%8<_8=sZavx^Zf^0QsAsB@hT}w2%;hFVrjQ(wBPC9j#e8oY(n1MlOdMx2$)LAu zQ;HY>{{V7v?HAiC&ujQEXPpKu1IZ5Q(QbhPYypC=Hhk7LHXMQQnn}@jeisPTf`!K02;HG$_70j;fkj9^spN+cSBQ>sgo* z{6VAyJJ`f;@nxcgug{O0f-`*kGVA%>=|PRD5Vma)0Vw@M z&kuCy`F4CZVT+a$lvs>lmTX&4QJG1guv@Du5M#k4CVP$UtEA*&9)ix-8w?dAQS^N) zcfG=z5jHJiP!90IDDq~kn(f+cE}<`P*HfIP)r8turY@&$d^DR$6Wvu>S?#~>B*Br^ z9%j^_5gF}u%&^99Fa!y((JqC_wXUXl^b=eN-#A#*86#*Y4aIpUrgCa0uvC$C$_wE; zHs;mHG4yK`(ttFM}22OiP zgSD_@wAHt2-X`BRfqoi7tM-clj1{wudwDR$-k!AFK>QE$45VEO7w?p#KVcd_&i|B0 z$$#Ab8flcMP6ivAs(z_$UHeDMPiW{w%?q)>V7OrfI}wn*^>ChVJ^G(khX&Yf7 zY^SV5gaG)Wu-fPHSgO;5FuECJdEnRGy)APwmLmFofmq=fL>d&8a&CLAu z^a>~V9h7y=<%dOcAe$S^0IT|*K!aL$mzOWXtnXkvg*9GbPkoDLjI-3a*UC8fw#ygD z=b1=3gU_;!*RBVTX{?ZPF@c03)qonjK>y4HB4m6?nph!^KOWWMlgj2xW@Tbrp-2R@ z%6{f665(`Wd{`#-6ATg)P-Tu>2hwPTj=P4x9dF}ZSY_ZZyshNYDLIsJG`!R7_glI_ zE-1UZ1Sl|rOUS@EA<%JcmlfDvGP}Bg7DGyLqu6N$K^d_lyeNJ0(H#{OJ=*&+2cX&{ zIpEh_zdP{6o?daqC_zV3WA$E*)+2_&I2=kvQu-qVex;n(JHC0r#`}}D}~VZ4_Ory8Iy*TA(E^Ax7t9fLQegpv)^pRAr3MDN$fCW#1(3ze9zN z4BeGieCum;C0%Iqa`;oEV$@JI7>cM^Phbt!jqncqwoO|^Xgh^T$*Xj`~Bs1tK zyQ99ZO?yeX09DwQx+GM5?4$PffTBYX8WI&$Y)aC)v@YB}GQpT79xJ86M>{i>VuAn$ z!u*lV>^zp7A%)&tH6g4GvvhO*$HCP#j_{I4`Ju89)|kHUnLj9^QT+8@>CeI^-q3C% z6r`fsiJ0~m#b)SO#bF{78(qi&>|y#A7ZVSd0YPjUhG9deH=cq5H6MMZf4w*Pu&6oN z`y6Brb`nkYQ&!9D9~30dT&DR0CeIcipA-DIIKfJMpvTrm<=PJ5i;LxrEWkosG&%9qy0M0OA|;lLRQK= zn%m9S6%k)cb@e&wi=q~kMUn60fhPGXb)Kk%noQ?xuDCwFv&?|TZ@6kf6Bw%lZwVb? z0M&mr`x|{ERLqtEfhJ`1;bttYyj^F4*5f(ph-cwze392_Rm^pc%h2^XeLcNP$eoZK zrn<)sn}`J8Ucp*RvD#^)8e=#^|K+P6^(7Yj4Ew9Lh4dHn8U0@Azc>y9gOx$3ISSIRUMwf?C?OR^OmG@s z)q*{Ibv+oqe?o7VslsrZ{08)~xT)-|R|^fX;%I16`JWmiKW<%YuvA~?iU`)M@?gW0 z!hO4oWM=nga9Javt@atDW`kJ8G!D2nbc)p_zi&oCsZ&sQZ3m(p#-)B=dGy*b;o{7h zDnOs=jyaV9)MspfBPKD??YA_dwOFxMRSZ_vb%{s7W_1(n0P4<0ZZ3TpF_pc7-)_PU zWiy}ZE}1R`iCD8aOJaH7P2G-ozo$fsWHXtK=N8Oep?;jO#nqUVUn}8OYANs=fLPih zzJ-m#_3B<(Lh|f1Y!{A`Kk{$V710#I;M9CX zl(yavBT%FNWzcxZ523w3oi zm!DB>S@Np4mp%&&_%{eop>htpv-!~5!Ezu@LKqw?i@%{CL=n6iGI=c3K9GYwRZLH! zeHK}Y+%j0Kq+(RbH>h7U z?Su_AAQZ#-85cbBgbKy17P;u&!Gys!Y$AW9=>GWCi0bpKu$%5#tIeg@v2ezBUgxEv zzaC4n9v)w_Uxx#^={;cG#a(%Pkn8RUJxh$dyuioq4jDtSpWkOEE!V!sc0Ft{H66>-DH5NYPhpPNx<-;r2s{A!3qE~i^0lXt&d~OqfHlqN7G%MTnV(KS zyg6Y*p^G2tey@UQXo_;NL+lT-}l8!d(;*BKU!^QT07hC+gT|vlG@WP&(+?L7)5GLm-L5 zd3rhPRmFv60E-=7B-L-ac11U+8M4edCjd|4V08NR@H6GgyIwqW~$S{gkIpV5da|9h?2u%>i$nh zu2q)JoNuG*cL1{b`%7@)>7}x4Nm~9_(jfKa22=&X(cmmx^RKaG&QYp36pv5PA(J;) zsX+qv&H!1#gV{2u^5wqH5_&0#Kfy_v%zbF_2%m@n&0IsvU5Hcbp!VisAv;-$iy9CS zNutwg1>Y_8qxM!T-`M;L20*S$E(H&REZq!8jWk4A23^$As`^u$>QXT`4Qk}z26%A}zIN znvwEDC|JB?ak$aeh7Dt1Hss71OKF4s_VMrs&2ABeIvn5fJul=WUKbHF>IiyF&9I<9SO|qF%O$s=RYr z;23!Bd4O6z0&Qbp3jlx6%OoEq)#cVJ{6~&6sL7Mkeb170N?raE7I{5uq|4n{;!yU=nThNeLs|hi-WXODp=1U zBa!uI^T+@_k(z1H6DK%-Ym!M9cNc|^U?xHLFNY%spA`!RZ4f+gQK5U_bRstKFy1%NGSHdteR zjC6fiPun=iOcIHxwc1nTBHg$=jKm(*D#iqSA0WW&jiqBx;*7QTY^qe5=^K(1EISMJIp27z3hQsTw*mT${x7D=6< zvzD8CA%MLjhzyY7>48s2)pfc7Js6F2C`Nyzy_60r8veO%j16$n1nLeG1c7C}kZ9eG z(ZPPvor%_un7MVq(^A)NhTrBD6FU7Mqb*HT_D%w;x~EXL=iy9gf^*0ft7|f=37)eMsk+`?c~2eYjRf*-t!}$y#6jJ%L);E9Trig(%2*$hkl7Moe^`ifktE5< z_5SI$lm^HW$Q1S6fn*68P$&M&;6WmZ3`;i z-eB2~3O1}^T?$+xfAKshSw>$MQ1{xoS8LW93)1v{#?jPsc#OaH68HUkU8C*7C zU#RL->=lUrN}47xk+FjW~UwLq~D2KVk{*|A*tTYCb%iDEm96nAj;qx-V z-X-dD#;%!)&uJ|3ZLo~`?XK+3d&V}0W}Xq$X4-D7rXP2*VJRTtZToT6GoLiDPh0PD zdLCPWsQ%YZ8dQ&`KxUt#qK=z>{ajr=h@pB=$~k}}IY5+VXRPGDfgc^K#b+0LkU{d> z-JBL#EOSMW?QH6g?*EPY()U0ZZ+sVePqPj||0?uOVdYcR6x9S6A6~Rq$vK*{cw*c4 zuXxbU2-)5>8A%UFwb!Bl(KrHaAdM_yb zx3o9mo6yX(U0Gj}5NTLJT8EwIN}rg>(abiGONJklm67xj$k@tacxx3A@(dwk>iav< zqqDTPi-LEhPAX>B;1Iq)Xp)7Zxuu(`m~q1$GQ3NSb~zaX5rtoC(`3RBNA4hU9ArQh zEC6HnnQ6fQdl4z>Bi_W#-nc=3dzzwN-zXD!h+U9!EH*PgIc1jMn95{2kVkA&HIWKl zdy!aW><+26aPF+k`O~i)@rGdRK0s_la!;bK>o=2rI3?pxqSS;(AfwuR&hz~Y>VN93 zQ=3l_x=XuW?&)!#Jt#;#Bg@2+i7_21nkn)^`8#Y+N+npp#To$pV_W%(+x)KiO`$JZ zFZQ9U88>RElZ$(96q3Y3Q+{xZ0g-WgOB5r#KY~j11%QDQHoS4k16g% z(kRQ5pClRRDnL6oyl{ct!QXKi1tHoEPXO8;YsaB%3Q7e`@gK;chC!Kid?)SjR@nZ{ z(c-zF!I9bi=(9c6VA7z0c&)db)z4)zLkF-}yk#fIEjQ-S7&<&L{bO`K-6+t|%ZR}D z#(StmcrLAXdUdUEX-+^L&X>tPz`pJukwEoa?%1_bX7Vm!)QwKedTAtgU-CuuiwePc z4UYU~p`beG(#RYCq#A5!A8wweDme}&(QS}&Rx8O9DP-Q&ZIt1p`GyR#{tjxsq z?TI{=EJ^5N0`kTbj3RcvFgJI3GH#QmEiAd_@^=oTZTxLpNQG1oCOG*OPVp3=fAMrf zA*SA2?^bu3QGzPey`5U8B;dS1nUD05*9ze9^BMq3!-=UwRPL1)5|P{!tnj*0WU^*# z3{?c|cdz}11{5qr-I_@N*5_|_56&}6o$AxW$qmrX4e27qe)4xPv9 zn|i|AcV@%wVG#~JVxL{Rlis0f#Q_a{_3jqS+%5c(et%wn5Q~N}96tll#jcV7hts$r zu1~9%JmxMvIx;(lKRzuylvZVoPDM-BXGEz>#8cLN`BT&2Ug~R7E z<`&2fmqK7dB55zIR#gjDPO9@EL_41L0v*sMn((Sgl8&4w+tPeST-Ggqq)ZL6Jo0vV zV;UOmERpLeyydOozeqlMfNjQ+|58G=q!O@5&bfg3q2RA2EnW>UjJ?EHw#wN6eKTq9 z7`n)ELjmFmloU#dM@)I6K%L*8bLsYykx8t}jq(5{049nbRrJcxs--a}My3(jOaZVg ztG#hP$xm1&PHMTG+<>Yvl|50Hs@JCge@i!o)>+s4QcMrN!*_p8{MmN}7k@!mIRVH% z!>9x1rB=BzYHKXd+2HI2u_+9~owRg%qiSAxR*Kni=p=n7FBtt9WM`W!7zFEl=t7O0 z>cWtwRDHo)W1{0ELeeNVVvTnBptMsXaZn2RMSM=Q;k{>IkPdeg=AxUGe&H!f8Qg5N zVALT=@5KMv^oX2(Hr441L%-K0muB{wq}klyc3Ty`volaob{D&x{(8P9umpOO!NY|& zJOI+tvB3j{l_?gCHi7B&qRVSGOdHLUbG17dwzB`+^=z~;D9E6cmF-!Sva zz5UqJq65cLZsCdBWPp20)Syde4wAgkt$s#-eex6L08A)&B!3V&FkhJP*H%qSEKnq* zm~-lT1li?9d|ka6FJ-d%e;Pg-aMnZiFFpR=0%XC4fnwwK&Y(@9?Zr5QRA=xsW=xVWM${eF%B%MFlqkvHdV2EkLZbyxM{J&YfGM>_oYN2TwPKQ_Ytl-OC*=d+5j&2 zJugHo3=^#(z4Yj!Bk0;a1;Flwi?5_!t-JJnwI)S$$vvC54RHU{wt*k_^P6Gw2WkrB znP>E)0j11-aJEy?=M7YT3$h2phKfUnh3vtO+tM_g%g8ay94Y({0h#!~qcvelwpOXDGt?2qQFBYuF^5=S@FR-HIO8-u-dx z%yLzenWlt^&~>lq%Cqo|l2BL(qW;q?eX+PC73nWQHB^2|$4V!JjEUl|c(f#wQ^9Kl z=ha8yr~>^GndsX)NC_(J)2WHgzm&$AP+Df~M_n>9w96XQpm6zDvlZ?WjvAzo)(gq1 z9x^lDgWj{0fGS$BpqVO}?WB}75=^`2o#T@p`N9W^8A;%mITx%)cllfsv7yza`Xcqv zP$^@PMxEZ86m@gcK$?`9=SS4ZRo-y%goB6yG=TPRWs;@`OpO(7M|fDXVao#Tf=PrW zi-U~gG=r^jHKePdPSq{=>(mBM$Qp*Y z^{6`;-(T)Gm_4AJPYIk;@1kt6}SYEQ@ z>={LGb1LkqtB}=dk0U_iZb%t(?x9->BFQMsB00{=Spq$(#9!y)H)!}5y^I0s`N;Mt z)?Y*0g5uzg0&|dLz_RC#d=>rUDkK+twlU|#@5$*kX43@SxCPSri1o&X0r_jjZh5G5 zV>>J%oG>7Id#PMME;AsjNks;lFUCsUzw?op%lmY^V8Q?x#35ka&utxQ@DY?)fn0%gWE z_=ZoHMxoxYxjlR8<57j={LmDE-;{Nw`z@bBe0i#HSK-c)tnWuMBfSjoSM36)BQtLX zV=eswBV}G(obkh7wy{mO51b1?0MtfWVDnH0w7!eCqDnU?tv9#C&cjv<*193tM14s` z1abR?(_HS!?utdZ9$N!;&EQAFIK&A;tD6zsujnRT-2aBI(lq%-O|yy19bO4c#M6bF z@2l1XJva=n33g#>Mhm@2{x^xnU|eA~Y=?9jA~L})0bBbM!}a&CuJ^nFi*H#Apg)Rn z9y)l%;_Ro&)S)w!nZj`r9+T;;z)m}Z8{^LRJA~UMl@p9Cm@f4BFTCZ7|D51$LRfPYy1wKd(N3RTSngA#%L(k zRbI1)0+1nrhId)p^4G3}Tx{a2r_>9PC{(=OAh6>wNkw`!y1Py(%@uq;FDxB`HkU8r zH#ef+>v_DQ*FDm{4ce+JcItBtoEg3|IA^7~o_`w3m&XW4HnG~wC&2~$h7fLrDCD-O zCq8RKyewJ?**(}bvN$>ui&ZZx1lA>l=d0M$<{%l?y+5zKs1}ET9OpuxCHw zVlvtWgg}X8%SFR)oF-vMJ_Q@CEw=(}Jlc{9X?kT`VGpI9U&SEPE1#|6W);EgEDm`6 z;G0gk3I1&z^yZ$_2Wk?lfjEAnDp;8aA8~P}{z=gm1zZmk2`_G=FpaI-hRooAm+qlq z`1TEA^S!V0IL5b11xBiDiwO@cGTWZ;NGXx#wo;U8Lz*D%2&sA%;+WZTFuhK*r_M21 zX~z`A<)s)zsumvog!x8Y8a~&-STbL}hxsNeX{gxg5ymZs*Oxhf8O$%pxH8bJG1ci&YE-h1{1 zXc{Ewdgh+T8J5j(q@?LoPC^M`d>fsn>2ximW0$oR^}(~Mi_MAnJeF;&wZQJmwUv66a!#n)A?12pY_P{(vN(qL5?mO z$XHVafP#0Vr4{KxKzl9f=$QqZ5`lh^eS4#V0~+ZA3~9F-7h}%&uB|z~PuCD{O0_zM z>Ki!~@gI7-njeIMl?%7y|P31cc?gQHezR;DRANYW5tgKhv3a>Bcttwn6A;w#dvOUf|;I7aO;79x5ry1K96W z4oI%cES5mv1^Cwrki^MLv}S*4<1QlJ;HX|syx zB!244^dp_Gp3-&8rCl8Wm;I?w$z>;dQSm6l^e-+~c9HVlel`UgJVA_W9LAU*U*eB2 zw@mwz->OxPdZgxI6V@LBn%0*)kvY1s3Q0fI--;&R1C+d@hqQzRtgRSoleLn0kS5EY zajHV9zWv9w$#|Mg1MIT-kZh*bTJ@8O&E|tUHDP*)=yh@no@Pb+^L-x~H00@_)qTH) z5ns6g3(Oua54uVd2~&5@c-{GM&MK>_Viiz_9u!8_gY^sxY0{lv77^qc(d6@uX!y2) zPfdTiv>KavmyAx3fmWnHs$$;ZvyG}OL|rX7MPTxlnvQ#RMjS#{ut1a#n#PzzLDT8a ziQtO@N(x?M_T=yq=x1+?>RuAiWi`@9toaIO!xKwUcU>8tqoqxtr5AxbT(Lg<0qu-i zzsHEP@VS5zYPpA%xDw2#+%z8^lAd2?fCw^>EBos6aAiG{HTuz0Yy%Rah*#TXB;AIu zAgr%BnS+8m2>{~5;lbHWUMSWW)H8tmamGJ}3{P$#f8$03llGpX5wl%2_5MB2rBh)v zr8vA?kd1QI3WlYz&SH)cP9x4a<0&s1vrVH$O4||wX3AqFdfq~K0O)Le$@pt)R5{e% z&C1VzNx%KooGfx-Ms1}=zc;6U7A9wO2VlDzJ}IQ6-MtXk&%K@zwOunSrk*c^=X@sy(+&8pv%hwj*GH#Q!pAE# z*(j#D&XDvx!gKP`!GKa-C9J$E{=TZEGFW_V_#wIfWyrzO9E$+s$QpZHj_3XID|b)^quSjy_;6 zT8`WuI{`#3*X!@wPdQTJ;)X)ae6rkddLiG*YCuE8jE?Brz!`4>Im2AGhW%c-Fp=zI zmM%0IV-8KA)GA6elb4qnf53FJoZu4xS12c4B0VnCar`Ojia*D;c_@dwo5gn~pGF4J zg#)%V>@GL$$eu@%oU)ag2?pYyQ0->e=TuK)0Yxpk($M8Bl|>()BbWK=WY}CNP=t?f zXfCyASmSs2ZCYGcuwx?6ufkI(V?6k@^n^49roxNT>+XhHm3+5&mz8_o%!xK6me4r$ zWsYe*f|mN9)M}-QSPAy6_Kq9yb#g42_$?ySkIbIJF~11B%tlGII#V1pPyD)G7Gf9d z$s2aQHw~O;dLJCh#olRouF?=0MbM&M!ogY78z3{Pp%(-J>WAowu7jwmAs0n0AypSX zGzwY&(Y4{LUU+OnrO<2^oz>8ldPZgqR_d5o9I#BA@s|-U=XL_Y6|uiDogx4s-MR2= z{MVEdENk9A|7b-g_fFN>b~2$9pb17=Qy9}uwQj@sR;(I?maqUML%n%7_d#?cwqK&x zzcK2kqR)0OPZF7z`-MAE#;Yl*lTK=M_n@KI`#<=wDgV=`D`x+rJssajQAfR2{1Jv;K7_E=I2BTIFT!T4|ylxkbZ1X)Wr`^WgG0JGClug4ggA z>>ByDx#}6*b#H@wWB)1)lTnKJAOG99>dG1CnnXJMTCxsdEfleC4u>leA5fGQL$NL% zC{3|Dt26gJ`CMLKla9pjn(PoVD=NlNIDw(oM%(2d>L**%7FT19d>p zHs!bJVz+MFCmDMV7y4yq-niJ(ImVMC0VT$o(;Tfz#@D5+vPLUJ_nt1dtn?NY&I}Ux zStXQkej`0(W_WLi_g4GppBc*K^Hh_Id1&#lms42Q zp&wSB{HM+i20|S;A-_d}YQkA20N)P1qnzRR@_GpY|7C;=SVuMTUWsos+JkH5#@uGf z?Sn~^XWk%qKHd*7zy=#`0F>K?nw;p>;og_XZK=EdEiz+LH-nP_9=#iN*<`RiJ$z|( z9+)k?DpldGiBLfxAGH(WXAbdD?T0op0;O85$y0$WP6Lmm?{689t8f4`JByaTAB)o=7<4}9coQi0wW=9PC!S;+%oTgsqO*K0< z-A~H&<~5DZJR>1Z8~}s_W8J)0=wN&e;ZCp9tS-ypp*<>nKXRR^J_a7GAGSl8RWSb1 zb!;|RcZyN}d{fuiKU?S|Q6Ls;B<&i(ZfKuufTU-7oVCMNrKbaFpju@3WuFBmE0!Jv(QE;>TdEHVM$-7 zs6eSCg)hCf`D#xSpNBJHy|!I~Hu}I&j;UIpv_0n%)YZClwhoM&ngAg;u8@NEv>r8* z3<%-U&!4!6OKceclP9@^CzB}dyIMaj}s{qDu0~_rzH7p^uvRB)ew)>+SFUfvIMk+ zEIjgh&xq=Z!UD;MISS#G^Djtk)Z6JDudB)Q1Rp__4{1FcHy~b`23Ct+lxsbKZ0tKc zAnEw z7{HnPgD`7w_o$w09Ri=R&I{)Tl^!aD$4$2Sp&V;Hrs41*=0G;MIqxQJ^6?Yy-*o=U z5d{<>1l98d=-5}POR>*yPn`(6oxgIHDGnw(+x7Cr5w4=XVLr+Im+5qdaRi@U(Utvs z>L79dw|8(`ZQce`R}9)mS`XLAB?Jyc?*av5Z~d=!9cN7C+$)vz#_1vBM=3nG1WG+ zLQ;dMEf5~-HVYp+a^aKkC?go5u*Izj^lrWCS!NNS#ur$E|~I z*1|Ym>VjDALY>3b*!Hjb$*2@XxX+lLvB8aXQ;TwKQI=@%`Y@Mu3{sr~EU~*%{6I`m zmUx)Df8@yI#=bPCs)W>-YVG8F-a!vwVBC_w95Y{l;LM=W`Pe{fasLH%rQJD}I2?CQ znT@_}QG=!H+FVpDS2hgf>+tgQs%P+7V%fBt;`3Z92m3G$@7mshm8`KQ_(A@1bA#We zgT*e32Vp##5(aF7X-KuFj6FKjr3xk|64-6>25s83De}J8m)#;_h@)(rS;SGL?G<+f z65Kud+AzlJZ|BJ5>lrc|LL59Yfgnj_kjaOyeh$J6o_n{AVi9}4Gk@0(#z+Y73hez=wiUc$~cVx$S$@ z=V%Amzq~)Z_3px9X2>?^dnCrbmBu@QWGRWO737m})b+g_7Jh644E!5Ax^2VS|-j-6gn^jj|^^t2th0~8xR)KF%z_=(E9&z<;AX}oy7t1`W=7g+T zV5mMW*fb;&Nux?JMFq}9u8CCvldgPBbDu!o;ZTw?m=JJ;PtTh>C|c9R6AruaV&16C za*8{L>O%vSO&iT#Bx+$5gplRazyX3Tsft!Q25boqh|ds%fh@O_r5_en2C{{vqI+St z?DZ@>Kr_tavPF6vYih|pmLB#BFM7G)6PWLj5acl(K^OOag=W4<{DjV7tae%M1dpA3 zn&{Y+j(?sRfxc!SvdR_ir^ZJukd7JAkCICapygtRWxV|&KF80lXf=2U4vh@I&{*>l0O*--W7U0l%6# z1oZ)Ev=8vo$Jr&u^CMimx&kNnj6epYIVCoW$1FZ!zwIP`b&9JsH}jS^o)SkqLVzW( zSN+G=Bq7#jL3bP%%+-Ga3dB>J84C^x^?$IWD!s(vKpS=SD}@3yA)Li3hTg2TMzMT0 zR#aTC;5QrCFm+;OSzaN8c?GSCGD4ml&7=@E6HXKCFQhGWx0smRyl>c9M-J!o z4kO%=vS#3dbT>u9ea%tXuQkE2%L6@s+sqA?W=K))m9312WoDgNhbVu*#!E{qmd-qp z{h_(!lA`^;xE)e%_+K~m{O)fz8{Altd0nIBD(ZCZYD`_K6hV%nw+})J_YtYb@Y`uv z3b@&Q#pIrxgjzo$tpDnMgPGHhAi_^$KJyuuzldh{{=)0}!SG$S4dhXcTZSN6!PGhk zy+5*sRFoj9T|#L|r)oYynXrj!-rwBb-C!KVZ2uELo9iNV(CWJheR)z?r(k}5vl|-9 zXHvI+_^OE(>@Drhg?D=7lTwIPq8cSh!-4wakv?%m-An1t>R}Sdjtx`_Ih9v#2}|%^ zqB;%Z73X;b1#ecCrZpZKCQs^ecnQ+lgNb~K#N+F-s9o*Oz$yuwbEPE5O$7wR+g zJIC)$n>y(3nM|VK=m$fPyR3b1uNOgp=IP956%sNPEH&xKT`yNTtFIFF9Tfurk+33F z8^Swd)8WaTBJB>D1akH`AH_!dO}={C z(`yfspRk<)b$viq1HdJ&(ei~2fwBf(;oDfvSK2{WWIyAs<|W^(>=l9cV$buKi-KbW z`VT!iX$QH(Y#osNCVuvY+)eQ1Ra>gXu0p|RCJM9&d7x)6OBXOdporQ4)$!fp7LD9uV%(MAH)`&9NyyO|iC*hnu54GhlZ zkC8v3C|Dru$f=f4={HRv5ph6cOceRf^Ls><2o_fY;^3MMFW*?W{q=^zxtFH0&4MkH zl)!CRn)1*)VlYVV0N>sP#LLIXo~_UrnQ}Bcg%&&<=Y`@9!`8K3g4c@TV>yMGOWCbC z40VuKrTOP6iUC{^%KMRF5Q4V|`TZ=t0AO}JF4=kwlSfHqI?j#SJEGf{C)AcCa0M=9 ze@?pv+I4<`l9Ord3MPn-I?>LvU;t0Sb)yK@R?{1n%aKoETG{9Tk&gm@99R!ymZMTz zB1k|bTHeAcUauFw@R8E-{ZasLa7pgfhOiJbQcnC!lg>-KREV_L*;-`lPxHDw6-Vna z5$f`sLAxdMdmXi(*IZ~c=graQC}^j9aaX&#bz;9Y#GHmp?3lPK zl#Qvm`u!YJEXnrWp>Qe-l8V$=VnWETX+s=CUVXb`T5&k@Dck~4ULEuf-fyP$iJG{b z(5F%P@ragu4@d*tvPlcEd)rb>aB`cT4U49$f>AZF(Zg6oiIL&tiWflrA6=RW-9+*ioeu3){+{2B`p<8c>Tofkh)|y=I)|0sZ;qaT;+c*qA3T+#k}>n-u`YDjuLR@I{!qslaenAi^DxmDvx zwM)@v_v1KqDo7=({{&F{0mhT$ImOznKV~;YYfo&J>SPmi7m%l+EEp*IQA>>h%ll}X zbi-@|_6Z0E7h`WswadUHEKtWd^tZ z$DK#z62A3ew08FPS@Hk1;-&rSzm}js`$l}Ru=-sojACX{d!$$$mDEr#BJ6r zPdnolLV&YB$%w?}$a9j^WhbOOYA2VjRCs8j?h|zb-lDvp&HhnPg^p7#&YJDY7~q8ZUOgThY+XZ{RjQF_1V(X zI6~#`nPJ!B)`>G`L;Lz|0;pOu9{e^J1aR;zW~YUfx!0x7d9q+KUniVDl|=0!<68vS zpt=5w*U_(Yo_gC%Dz9VW43T>BoP1?q#$3;*ejbM2v)|vQVlYW|X9?88((-Y)Y?n`; zS}`8c7F67!2RE&*xJty?JJPlm=iT727}`H=1kcLE`5ryVim3ig3p3AwCpkmKoz3Cy-Yx+?7MifzL|+&3V6#M8Ay^HW zLPt7KAMQ^}`mn#biB{`j@~A2VbICCaB8w9Vbn&7z_$!OF7HVRI<8y%BxhD}frt0)s z_ELR`KoD`0%qEHqEz~d-=(D!LVOfD-FJ)PR|2Fo#pcryCZ* zV~vOw!;PLb3sOg)HSxq(ulQa#8hJ-5<0tRH>0z~km*{9Yo{U}yup+~xK1Q)UKHmfF zPX22D7fQE>`%Nrx!IswF@(WpFDpG7<^exDA`jWfE@$BMj6ankWF4JX)6Mn0Vk&MrX zlPLFQxvwF(97aX*|6RLJqBxIu;z}XghFpH&atFu%@DI?Mdp6xxId*UM68nyIc{4nv zijFixrp*ruqMl z+petRJ6kG$z5gtcStV%$=>`<>h3?G?FfyII{xBTZ!o92($n?*6tc} z8X)`KKuCyFk=tN-$w*%NG%Xza1|HS9DJZL30Cq%>Rj$l#0h6$bIrh~T52jm(DK8eP z?u4n%oXtt7DvovQZG zcWKG;+^CxGTUTgSM+kb_>DkX8=>*jtqlt0>LjP6^ zinQ6BzspZC=U4@qHJY{q^oO&lQ(NXSus`T=?7EBm!P?o8sp#TlEqWJ+ST9M$WfyKr zsBsm{eB`}Q-8J4B+Ho;LGdgnuug5hxOfWay3>=<;%{4+5Cawe#W|!#SBAWK%>=d9lqZ9@#mNWxyV}WylK8dxwto z^6)^SNrmeShLc2UGvitr{2Hd>j_+8$%BqDQz9mb)28`0nbpvsm?IfyF*>hRQfSs#G z#>V`p_4wkNbPeKjuh`(*7^=zn7drIBHc)~lA*{AIR!!^>)`!DaG;B`7oNU3ut_6pG zFR~^jawq7gxSF?B=w<+G<||ahJtg-?pRNbt=&shj;9kZ2I4JYJSG5|c*X@!5m<%g2 zunV{uxw>#kri%##*a%L@x{;D0=-N5i`4iLoj+bD^PfTa_euAKm-lnlvlhC>~leHeg zSm4g&^&2eTUdNQh^U;gl3K+mKzEW%B8}dHoN4)3T!OV$xBVZ~@`sBla!^Eum!W3Y;${+;!6K1Z%pRdpGxMj`eydZ|DAOLj~*POZZGbho6$ z^%8b96J@tN%-k`*cpJX$lYhuC=uNwRsW{709_S#aco`!aR9U_@1sp?5+UH2F#q7Q$ zq{_cgYMei8LEni*Q)bK1=Uvnc6v4phatyrk6;>1>R(SRj)FEiRj*7kI;mg>wcEu#b zSKLPz*dQ$2%^mm=_C7fz7{25eE?YtQpZ+bD(7chRSzV@+T|;NhJ|ErOmkX^Ji@sXs zWE35_Bw4D`4zuo2{9V^Ink7f8k2MN7dsKuwprq6NDi?DA6f?=0kWV;K;>$7MAG%yf zR?2`b0G8qw&zTQ3@&|{Y)R`x3GW5bp(0gHs(#ot zguf*Jjq0gEC1Y9Vsf!xcL+BY34}cO&i<}aP*-n&HXDa{z{QvgY`hWPl3M{D^v1*J| z_a{SYI-Ssp1%H4=+e$Lzj#4ikKDYXt6>-i}dDJlBU-`cBeLJ7cszLUh$Q_JWWNi(K zdP~5JIss(^3~90}e8DQx4M2AL{L~-LR{C*jJ-1Dr-snw>qI4;KH~Z>e4p+QhUZTxY znQ|rew7&sbd7nM*q0$4qk#?d}7`M4gpG-(#;KOeP26_KqIq<`T15)aPl4`!VZnW7G ztkaXTT=b_v3??haA3bN~lcs_^2EzMBExs!w*rB?E_moC&FoGCxev&49u06(dH ztyYavh0HF76DQPv{wC$B z1Z-8?zm6zKw8v|<{Luv3X9T3gybf4KFezHJq! z6d2G{jSs)YeayDrKRtdZf7ok0vUDOvhcN$JmH3(tjPw8R!{@Mw5gD3{9yOSD6 z@59PPDyyxwj(kF(cwO$xR@~h@GM{>CBB+ly5yNbUNb5LVES1z^Y`WjNP*|Lcj5+V% z@Kfty3t9Cm3!sQkgeeC-Iw?IS2wBEakYod(ZTBAyzZF9&u-DK5f|=rD_ouWNiaH(EC#pSiD9R(=+D1QGbON z;DxQWP3Cv%Jc8@Yd#8%FE#tDiAjN5 z443IP*ED7=N45bTb|*zC`Y&4yx&}^iaMq~nkdjFLJoyu-h&nHu!>O1WL&eJ z=j;MC9WqBI?C`YW*5+K1s98@~j5JBEfn4;UROQ_QQB7~r>nL!{J>t+7hG>Cop5dP@ zO9U92eL|#>&vB2e^<$Nl7a{3kh7;Al0YErqPQyEvr_%-T30O%h@%20GVZAn8I7=fJ z@LO?WnTBIXz$p$1!hJChlbXZD((D=nKM23xTWu!RLK_3cuhi=_}x>1Z(n%)JAc~yNZX&8 z){MK;4!AF*$$3g^P|elQeX*nChx#)y{U ztyGlo1XL6dIDm?DBA7t`=w-Y)u8{WTBh28|uSZb>$U!+bLCKU+7jT_0c}T?EwGsRc zfbZ7aW^wc%pO4c*u6~m_6XE7lJWQ&yfA-Y_pjgVLUmFi)0QG)BaA)3P71W1*2-=pl zx@x7N@RwcYfAK&NHU5%tauN3+woEE(_;E>xnt@MuI~OJb%MC2?GR{*zOYnH19%bRp z7i00=NyFw=S`06LJWw!< zXL3CKu?uk_o@+(}cl>_(m7wvpCA1GoD|jwqN|sDK_Xy?BI|!o&x3h&s@wV5j9{$CP zX%js_JM7DIO;(AM~dnP}5yn zy&Xwj7?CJP_M~j_4dIFAeH0e4ewdVq(^K$s+X&XSSn+%Kgv%V*PPl(xouR`dl+5ZA z%nNs5e2y^A!JWm$NbGlFNQXo7VYeh}$c7W_FM1Gzg~a{&uIh|GhhDLjmCu*3swX(Y z^0Tt?i0wf7&!&``{L+Hcdim4Lx&sZtglpT!hIG>DrlN?Htf#fqw)4L3RCD-XdwP@MbV2k&`?&Tr9Ucpvqb&R5rf;*VC34;<${F|KhE0xI}LG(Lm_ z;nx)0cNjijP84DQ;8YEd`Vxs%nklCte}_w579}p4V4IESY`FikQIZ?7QDy++2S?S% zB%m3v!)wFRVotGdSxGt>MiA+MMLN3`vW5vv&p3WvPp32Un|u9x*qRt*)^JT2s@N~G zIyWFSM%`TmfS$F^Qujg_aEb(gGZSeE?gg%RP0i!XpuJBn-N^VDmk<C7F;L_LhqtCuBQpo z1s=MVhxnK};r(Xzl9Ji9j~!cTH0^G#u1Fho8Q67t3hFS+^M+j->J)lcNRmFX&vB1> zFsk4vt6CB85D0gGBK>1v^td8!;QGlaw$GOEdx@#89k2n#Lzq+Aw+&J7ma4Q#W|x%5_oPV}!aya4>i9Xhi`;=q<4M z1vY0s;& z))q2IkXVyB35Y^Y(f|iaN%p!c%>+;t^_OM2wPASl+a+SYr&l1TS0K#~BXbRBUjsw} z4Qd>4HAJfM(qt$O178x+qc>ptiJtQ7y%LZmgsaK_pDK+U3dWWw358IdW7yX%3>>r5 z>pA*fLayIn<=Xvtk_aY~W`bk6$=jXgk+(krI|)J~2Mq;}9Amo=Tm#g}Fl0(N&Rd}e zPtfnBZqDqL3509K3_g|pdcpe1=B{Ql*QD$@4HjwMo}dFvPviPT;idI1GBp03wv%(-}_cV%)Z=LeYx)Injjd@CL=`I!+T^E;Kg4 z>A!1*8x`UKnq=T)reS+Fln?JB2X7ekJ>Q0!f_#?Ron9?PcWFP`eZv~g_m?Q)zg!=% z(79);8I-cONMk+}z{VsOB1z>Lfa>K|02( z{{&JYMc1NI3&HjW@^OLG+}aXaL7QAgT132b#`wtyiq|2X8D=uwt+)i-qOyZ9X)Hv9 zT0^lWPp=9@0t6dHU$-(xo%)|Gl}Bw)NEFTPNh~A)*mCpmxKLVY%LZgI8_t?4&qrj> zW7YH1(MK)8i4PxICVv644N|0Y$JD{3*IEBRkZfuq9bRoCTYruAOQe|k~l}|b3s+s0$RPxOSEx^*-;xb6@!yN_3)KmOTJII z!(JMgP%g`P1cc49-%i)7GIeI?i0CkH!>=wA?Aa;1RZ80vWc+EdL^d07B(4)8DLNqQ zRf}A?RImbSu;A3=+ry>iKwfq@Y7BYnZF{?-Ijcev&oYPg2R(CZJ_MnON*k|N3>CTORiWlRqjbIg zXd5t2$NM=2Ydw8FPsB!30c4Wp%jJ*;bdt>13jaYh`dcWyF37ou@y{KEJ>GIMa!((Bk zpk-~;_|4QptF*_et2L`04;+>paQuJeXVK=U)f;;+fbDvRUMs{kz^cs;2C);49SmVI zq~LlBN3%AOYX*@kQC$vvaGd~Ro}z&A*L%HkTa7G`4O?kM-nV85z;R932DQlU`Bq0> zjJ-^&d2Yk6jgd^#g}wGWuRAI3LLklg7`xZ+K?_^=K8-3P$Fot@D^{~y+5|AfL4Ws2TI*SR$Z}7Q8rwjukG^5Qp&6FLgMxi|3% z1c};jkbPs~{p+aG?7aTU7cu$P1;JB)7u&O4Myu?TP(r1Pw#ZT>3`P~F1Zp!!9h<3> zf(u*^MM}AE_ihKT4eS4@H$`L7y7|DBQKrIwKx4^>cBl>_AQag*1 zb2IINCg$lIOZ3A}#@WT4LL?bT;Wm0*y4>|&Hy1&=J`QT#fh{_Wu1~1)oJ|7E-f8}Y zb#nu2*>6dEzX;`YJ!HBoe)I3xyyr&atpZE#*5cMh{4Z7?7((p z8a%LPml5I%j}lt*e>NKok%c75(n-Xi1b8h-qh5xP<0bPNSFx&DlFsxBS=p=C&W7c} zJ|VBv_T?|W;!_oQ;$RrQ)>(L;cB~aSg?U{4{Z4ps0}h1Ee>m<(;hEaObHM{vYQbK2 zGNb89t@LbbPLWg>ID8!X6=~8?1`~q}pIhCc&#mi@D!U+y*B256i^-5OQT@AgT<4Qj@$KSlsTJ<1qrwj@^*IiFABF~b z62yxH^wNnf!D|FejYO{(jXzGb9hw_iM!8ssn?g?gr8$2ytd d=Nbrf5DwqAaAosJC&~{mcmMzZ4{!hg003wv$_wM1=uq(nJuXgd#OyfKi%AiGV<;3ZWVyR0+}s8R-!L0R@CXn$mkH zF(8m2U3xJ?NeoCQAicfd%yZsb|9{WAXSeTs`|PvUxp(cDyT*DKn0c820Kf(P+uEiy znR@y%ouSPTI&Tt9(D|C`-2#;L@h{MfyUs9u7b7EpBu!@m(9^L4fTs}J07SV1e)*vT z0Ae%=0094_`#1V0{m*FXpTK|hQy@dCiw2Fv%KaY94`yVj21k0xI5;64on=rSUZ(&6 z1f@n(J)HdcT$|YBc@SEh{Ya1LB8J7ls+#71BcbI14GsD9XqQYcLB5 z2|;|ET+~dpb^lpTv($y%{QSJsWMu;a0%QUdWRO0tve#8rRb}PmW##3iX$WcGKu$loOY%cJe=3-@vN@^eRe3Z3#gI3oT1)P;pl6a9St-lw0t%fFdCeg7$oR-o)@ zME1Ijoa|3-+EU1=R}Jdp?o7*k%CB)9@&oz*eE;Nu$et$uKb!fx(jQ)0sT#}>*`LRz z!ED%i%?bcG->0vA^B#(B#cH;0uI{Yill|jzV&wAc$g1Ceqqo{lIk}zsz=g*l4A&HP z=v}_l;yAl*b0p{u!~DepIMw zguoXV<~;$Or;|Bo+o3f2{(=tU<09pW2iFwGOwe1yAE^-)it9Z5d7K6Z@>6jP_7w3w zsY(&pN$f8x(Gwn!5$(Al&i89)L)h#r&M)M5Pp;XszMhmO=>y-^23|llhC+jCjvo=5 ztIwuT1sH{mmS^652f%CtYF%2%yEDuDmxJKu8G2&wbLP}#0wKq80dq(P-P*kkH&4f3 zNQ}}X%~Eq=eHY*O26s`xP9QLrzuieU?pg#}%wNY;dyjj0pZGQtfK7+f`-DZA*en8D z$F~>(%89S`aS!>DxAx-tL`PE=&U+i|Xah(0e9vqBgY*0h@Z1iM-neyES}tC%;ny+s zC%l#omye3&gmKloOwHVFwVh(QK{!_aOHo^|6UmmOKDR_wn<M_v>#Ri*DZiGj)oouKe6Fx871AY5lq2XmW!Ou80A} zCKFq`i^6dN%eoRb%VttfICKr)u_v@<&-@ZF#`9jHtM{m^eQVc7b4s1m6i@&(M_cX_ zVxvh=R;yylCmt!!`2n|daI*#D^jaojGHY0?HuXcL3-f@FVzL@5!+72^8R8~Y!pOF* zuYsbat2NOP^o)nyQ^1h{_sj^(g-g>4tgksVDN6-nd)%lcB%69R4^d>v2)*zvWKR>n z_z_`I(hgy}=1T2kYP@Q^eawA_ zzwoI-F&ot?;l-G$-~nNy=gG#eH&`PoK1^232LeM9dDTQmK4R~Wunnr*+;dQ;}V?TT6a}6Qn9%A^4j<$S=R|AU&)2B-7#F5 zNj^NpMU(@R6`?9zvf~-9&o&E$Z5z{g{A9^a@r4piV=<-fv%&9@%iUF@$w9gNf;Q^e z4@+b|rmNnQD>X0zl?Fd>LYL%2^;Wu1 z@~gG-zE&8%^!9@m4Cb#;Yww$o!C6B}mq%14*9KEQdLTja;brmSg54P;?xY0HnAu%B z-?F9%kwte5OL5&cp(=W9|As)c^20?Vb_Po7Y-f5W1+2)JYCECMrabF}NWvPy>VTI`eUfH7tIH($3pd%oPfhSxd#s-a7AJwhN$jH%T+{a-_JSf=hN z2nt1Ec^v*ig6jP{Oc^!CVBZx7%ohjsKI$Uhq*Q$gW43Jn%XT{4j`g!4F!DsQM4X;p z8noMWsuQ_~3n^EOzi8Qapr&C0Wu7hZ%6AmYtT#qgQbnhv1fl%> zBcxhzR@*#8Gr8mX|aQE{(TvguML77`avBh z94@R^96cFb{z!Ao`uhA~Npujl^3%8FiwpP!7SEdd4n>iPl&y!u>rfk70HjqDfohtt zDD^oy=kIyMI4H{_mN7H6#PY}kWY=nU^cJ`h#_2P(P8bZGLuD%KmDLrN_IkO2p(W0U zkXz*F&2eN&;`+61N_cZaVAD~P=Chko&SnflgAPro!Pca+*+KA?CvnT9m`qpQxJR~M zySA9hxrdI?3?Ic?I5#cjww)nf6-cyvf9#FGkU9$~=#!`5hAiqYJQjUyNRZLh(Yp{d ztU7j2e`(#hU@nCyK3#~be9Xh8RHtBEyjc^q8m*EVz~sae-~J7+x4nDio`Wt*B618) zSsry-fLUqcDlKA%d;-nf59|<7>f;fGRu5!}EF?ta;@a^VOYkD!($2=}ohV=;26@02 z$IZ2eo1rRVrcl=27NCy!_VwMxz4Of_{%3LE*d?73N$R732}7+rt(n4-m32`cl4ASD*<{#hloIlOyMd?2FuoztAFT-TRl%Pa9D zo%O(b--f*3MJ%D<0=8lEkP(>EYV7MoTmA=E>xN#bJO~nWcctJh#}3l7Tkn{C^0PJ3 z5^csNko*WaV!Sboa8B1VYbVEt2~6JxpnE0%pjRpB0~+wd90MwswWG+4q%*|ZSB&v_6va;3W$M-{db?aK)u3-hCe z>{dzA`98BbCq>+qQAEqbBQx}f=v{=6#|5z$ETIlZ2$6?1i&c=;eevm5ycz{AC_uYd zSD1C4|Di+vy_@Q0G)K0Nn{UY76Tb`*$z0Z$#}mmL+f_s<>JVB-$0hc62`R@!n-~T| zWH9jl90+S&yj>8=?AjT?0X6z9Z5T`{2x)Z6iWG%gtSa@f*<-}|G`nkSE9+-&oYVn_ zG@m)0^Y`73=_agIG~Y`8PXB81V!{W3TDA^Qc7b|P;OS?%u)UWT`&P62l4=iVF7|kW zNFgLai#U>$Zt`aIrX5!An0A2r`XFV2BP!38%-W6!>E0`I=LL3G$_pJL*(&z`0E|8{ zy&9^>QeT~m7op~9G-Nkm!0Ig09dSF~-C*|EI?SON(_MB28zwn<^7%B^@$>fDD*{O=X77#KHWb~dc8Gkml8C7?S%R=`Z9F`jQ@-IKeo|av%CE9kyg8Kd zcyuRKB{le*zPEtOb8vnnhDg3yy57?UfTqYgpAi2%yTLp_szzj8;=-iB@mk3#Q6bXP zT?3BcTDhqzU9)==4TLoO3#v2{q%ZF)8#fYre>kt)pBtDO=i{>bLZMAB zQ^0ahg^N4p>E7fru}!V>^Ta+D+qVz7uWpq4VU!@eAv2rXx_Uew-dF981WA@x1ID8Y z({&pwj_a=Hz6s7p6c8q`;$tu0iPy(>zlyZ=Y%$dE+WmCBa@7Gr{git>V+rp|9LQYi za@-)Y_4gdpD8sFQF0mUvx{=oVH4rJTLu-~5H0>tvs)n9Po=1aSb}%YjXM11C*}O_o zk*~dz(P4&n82MHj)QH6{Po*M35=F)JGPduV53&|p%sX{nrJw{ym4%J8x00pMe6yRo zCM5-7H3X`}DEUQY%eK$27cWU4-d;8A$wlitEshrf^vGq|ul8rf+%z5DGcL1LR4cZK+6vJ``I0>&^ZD-gEry eNT%jm!ssLiJ=~t@6|tN)iTXOm+GV#MJpCV&VN8+$ diff --git a/solution/submission.py b/solution/submission.py index a8076b0..e91caf2 100644 --- a/solution/submission.py +++ b/solution/submission.py @@ -1,7 +1,5 @@ -from sklearn.ensemble import RandomForestClassifier +import torch - -# The submission here should simply be a function that returns a model -# compatible with scikit-learn API def get_model(): - return RandomForestClassifier() + + diff --git a/tools/Dockerfile b/tools/Dockerfile index 8cb1eca..dd9b157 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -1,20 +1,22 @@ -# Step 1: Start from an official Docker image with desired base environment -# Good starting points are the official codalab images or -# pytorch images with CUDA support: -# - Codalab: codalab/codalab-legacy:py39 -# - Codalab GPU: codalab/codalab-legacy:gpu310 -# - Pytorch: pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime -FROM codalab/codalab-legacy:py39 +# Step 1: Start from an official PyTorch + GPU base image +# (Ubuntu 22.04, Python 3.11, PyTorch, CUDA 12.6, torch 2.8) +FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime # Set environment variables to prevent interactive prompts ENV DEBIAN_FRONTEND=noninteractive # Step 2: Install system-level dependencies (if any) # e.g., git, wget, or common libraries for OpenCV like libgl1 -RUN pip install -U pip +RUN apt-get update && apt-get install -y \ + git \ + wget \ + libgl1-mesa-glx \ + && rm -rf /var/lib/apt/lists/* \ + && pip install -U pip # Step 3: Copy and pre-install all Python dependencies # This 'requirements.txt' file should list pandas, scikit-learn, timm, etc. # Place it in the same directory as this Dockerfile. COPY requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt +RUN pip install opencv-python-headless From 0d551cd9b7a4dc24a0c15f4bfd30c6e29c13f591 Mon Sep 17 00:00:00 2001 From: nnoya Date: Sun, 22 Feb 2026 17:41:51 +0100 Subject: [PATCH 04/14] Fix in the ingestion --- ingestion_program/ingestion.py | 158 +++++++++++++++++++++------------ solution/submission.py | 106 +++++++++++++++++++++- 2 files changed, 207 insertions(+), 57 deletions(-) diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py index d8a6f93..626f645 100755 --- a/ingestion_program/ingestion.py +++ b/ingestion_program/ingestion.py @@ -3,100 +3,148 @@ import time from pathlib import Path +import numpy as np import pandas as pd import torch +# Number of past trading days fed as a sequence to the model. +# Must be consistent between training and inference. +WINDOW_SIZE = 20 + +EVAL_SETS = ["test", "private_test"] + class SP500Dataset(torch.utils.data.Dataset): - """A PyTorch Dataset class for the S&P 500 forecasting problem. It takes in a CSV file with features and target, and returns - windows of features and targets for training a model. The window size can be specified, and if the window is larger than the - specified index, it will be padded with zeros at the beggining. + """PyTorch Dataset for the S&P 500 direction-forecasting challenge. + + Each sample is a sliding window of shape (WINDOW_SIZE, n_features) + ending at day `idx`. The target is the binary label of that last day + (1 = close > prev_close, 0 otherwise). + + For the first WINDOW_SIZE-1 days, the window is left-padded with zeros. + + Parameters + ---------- + features_path : Path + Path to the features CSV (columns = feature names, rows = trading days + in chronological order). + labels_path : Path or None + Path to the labels CSV (single column, same row order as features). + Pass None for test sets where labels are withheld. + window_size : int + Number of past days (inclusive of the current day) in each sequence. """ - def __init__(self, data_path, window_size=50): - self.data_path = data_path + def __init__( + self, features_path, labels_path=None, window_size=WINDOW_SIZE + ): self.window_size = window_size - self.data = pd.read_csv(data_path) - self.y = self.data["Target"].values - self.X = self.data.drop(columns=["Target"]).values + self.X = pd.read_csv(features_path).values.astype(np.float32) + self.n_features = self.X.shape[1] + if labels_path is not None: + self.y = pd.read_csv(labels_path).values.astype(np.float32).ravel() + else: + self.y = None # test mode — labels are unknown def __len__(self): - return len(self.y) + return len(self.X) def __getitem__(self, idx): - """Return the features and target for the given index, the index will be the last day of the window, - the final tensor should be of shape (window_size, n_features) and the target should be a tensor of shape window_size. + """Return (window, label) where window has shape (window_size, n_features). + + The label is the binary target for day `idx` (the last day of the window). + During test mode (no labels), only the window tensor is returned. """ window_start = max(0, idx - self.window_size + 1) - # if the window is smaller than the window size, we will pad it with zeros - window = self.X[window_start : idx + 1] - target = self.y[window_start : idx + 1] + window = self.X[window_start : idx + 1] # (<=window_size, n_features) + + # Left-pad with zeros if we are at the beginning of the series if len(window) < self.window_size: - padding = self.window_size - len(window) - window = torch.cat( - [ - torch.zeros((padding, self.X.shape[1])), - torch.tensor(window, dtype=torch.float32), - ] - ) - target = torch.cat( - [ - torch.zeros(padding, dtype=torch.float32), - torch.tensor(target, dtype=torch.float32), - ] + padding = np.zeros( + (self.window_size - len(window), self.n_features), + dtype=np.float32, ) - return window, target + window = np.concatenate([padding, window], axis=0) + x = torch.tensor( + window, dtype=torch.float32 + ) # (window_size, n_features) -EVAL_SETS = ["test", "private_test"] + if self.y is not None: + y = torch.tensor(self.y[idx], dtype=torch.float32) # scalar + return x, y + return x # test mode -def evaluate_model(model, X_test): - """Evaluate the model on the test set. This function returns a pandas DataFrame with the predictions for the test set.""" - y_pred = [] - test_loader = torch.utils.data.DataLoader( - X_test, batch_size=1, shuffle=False - ) - for x, _ in test_loader: - y_pred.append(model(x)[-1]) - return pd.DataFrame({"Prediction": y_pred}) +def get_train_dataset(data_dir): + """Build the training Dataset from separate features and labels CSVs.""" + data_dir = Path(data_dir) + features_path = data_dir / "train" / "train_features.csv" + labels_path = data_dir / "train" / "train_labels.csv" + return SP500Dataset(features_path, labels_path) -def get_dataset(data_dir): - """Load the training dataset from the given data directory. This function returns a PyTorch Dataset object.""" - train_data_path = Path(data_dir / "train" / "train_features.csv") - return SP500Dataset(train_data_path) +def get_test_dataset(data_dir, eval_set): + """Build a test Dataset (no labels) for a given evaluation split.""" + data_dir = Path(data_dir) + features_path = data_dir / eval_set / f"{eval_set}_features.csv" + return SP500Dataset(features_path, labels_path=None) + + +def evaluate_model(model, test_dataset): + """Run inference over a test Dataset and return a DataFrame of 0/1 predictions. + + The model is expected to output raw logits (one scalar per sample). + Predictions are thresholded at 0.5 after applying sigmoid. + """ + device = next(model.parameters()).device + loader = torch.utils.data.DataLoader( + test_dataset, batch_size=64, shuffle=False + ) + preds = [] + model.eval() + with torch.no_grad(): + for x in loader: + # test_dataset returns bare tensors (no label) — x is already the input + x = x.to(device) + logits = model(x) # (batch,) + probs = torch.sigmoid(logits) + batch_preds = (probs >= 0.5).long().cpu().numpy().tolist() + preds.extend(batch_preds) + return pd.DataFrame({"Prediction": preds}) def main(data_dir, output_dir): - # Here, you can import info from the submission module, to evaluate the - # submission - from submission import get_model + from submission import get_model # imported here so sys.path is set first + + data_dir = Path(data_dir) + output_dir = Path(output_dir) - X_train = get_dataset(data_dir) - data_loader = torch.utils.data.DataLoader( - X_train, batch_size=32, shuffle=True + # ── Training ────────────────────────────────────────────────────────────── + train_dataset = get_train_dataset(data_dir) + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=32, shuffle=True ) print("Training the model") - start = time.time() - model = get_model(data_loader) + model = get_model(train_loader) # participant trains and returns the model train_time = time.time() - start + # ── Evaluation ──────────────────────────────────────────────────────────── print("=" * 40) print("Evaluate the model") start = time.time() res = {} for eval_set in EVAL_SETS: - X_test = get_dataset(f"{eval_set}.csv") - res[eval_set] = evaluate_model(model, X_test) + test_dataset = get_test_dataset(data_dir, eval_set) + res[eval_set] = evaluate_model(model, test_dataset) test_time = time.time() - start - print("-" * 10) - duration = train_time + test_time - print(f"Completed Prediction. Total duration: {duration}") + print( + f"Completed Prediction. Total duration: {train_time + test_time:.1f}s" + ) - # Write output files + # ── Write outputs ───────────────────────────────────────────────────────── output_dir.mkdir(parents=True, exist_ok=True) with open(output_dir / "metadata.json", "w+") as f: json.dump(dict(train_time=train_time, test_time=test_time), f) diff --git a/solution/submission.py b/solution/submission.py index e91caf2..9530473 100644 --- a/solution/submission.py +++ b/solution/submission.py @@ -1,5 +1,107 @@ +""" +Reference LSTM baseline for the S&P 500 direction-forecasting challenge. + +The ingestion program will call: + + model = get_model(train_loader) + +where `train_loader` is a torch.utils.data.DataLoader that yields +(x, y) batches with: + x : FloatTensor of shape (batch, WINDOW_SIZE, n_features) + y : FloatTensor of shape (batch,) — binary labels (1 = up, 0 = down) + +`get_model` must return a trained torch.nn.Module whose forward pass accepts +a tensor of shape (batch, WINDOW_SIZE, n_features) and returns raw logits of +shape (batch,). The ingestion program applies sigmoid + 0.5 threshold itself. +""" + import torch +import torch.nn as nn + + +# ── Hyper-parameters (feel free to tune) ───────────────────────────────────── +HIDDEN_SIZE = 64 +NUM_LAYERS = 2 +DROPOUT = 0.2 +N_EPOCHS = 20 +LEARNING_RATE = 1e-3 +# ───────────────────────────────────────────────────────────────────────────── + + +class LSTMClassifier(nn.Module): + """Sequence-to-one LSTM for binary direction prediction. + + Takes a window of shape (batch, seq_len, input_size) and returns + a scalar logit per sample (shape: (batch,)). + + Architecture + ------------ + LSTM (num_layers, hidden_size, dropout) → hidden state of last timestep + → Linear(hidden_size → 1) → squeeze → logit + """ + + def __init__( + self, + input_size: int, + hidden_size: int = HIDDEN_SIZE, + num_layers: int = NUM_LAYERS, + dropout: float = DROPOUT, + ): + super().__init__() + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout if num_layers > 1 else 0.0, + ) + self.head = nn.Linear(hidden_size, 1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + # x: (batch, seq_len, input_size) + out, _ = self.lstm(x) # (batch, seq_len, hidden_size) + last = out[:, -1, :] # (batch, hidden_size) — last timestep + return self.head(last).squeeze(-1) # (batch,) + + +def get_model(train_loader: torch.utils.data.DataLoader) -> nn.Module: + """Train an LSTM on the provided DataLoader and return the trained model. + + Parameters + ---------- + train_loader : DataLoader + Yields (x, y) batches where x has shape (batch, WINDOW_SIZE, n_features) + and y has shape (batch,) with values in {0, 1}. + + Returns + ------- + model : nn.Module (in eval mode) + Trained LSTMClassifier whose forward pass returns raw logits. + """ + # Infer input size from the first batch + x_sample, _ = next(iter(train_loader)) + input_size = x_sample.shape[-1] # n_features + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Training on: {device}") + + model = LSTMClassifier(input_size=input_size).to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) + criterion = nn.BCEWithLogitsLoss() -def get_model(): - + model.train() + for epoch in range(N_EPOCHS): + total_loss = 0.0 + for x, y in train_loader: + x, y = x.to(device), y.to(device) + optimizer.zero_grad() + logits = model(x) # (batch,) + loss = criterion(logits, y) + loss.backward() + optimizer.step() + total_loss += loss.item() + avg_loss = total_loss / len(train_loader) + print(f" Epoch {epoch + 1:>2}/{N_EPOCHS} loss={avg_loss:.4f}") + model.eval() + return model From 7e8ca20dbf169a4217c8eca559d6584c01ceb741 Mon Sep 17 00:00:00 2001 From: Pedro Marim Date: Sun, 22 Feb 2026 18:07:50 +0100 Subject: [PATCH 05/14] Scoring file for ROC AUC --- scoring_program/scoring.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/scoring_program/scoring.py b/scoring_program/scoring.py index 701f78f..39f675f 100755 --- a/scoring_program/scoring.py +++ b/scoring_program/scoring.py @@ -2,40 +2,39 @@ from pathlib import Path import pandas as pd +from sklearn.metrics import roc_auc_score EVAL_SETS = ["test", "private_test"] -def compute_accuracy(predictions, targets): - # Make sure there is no NaN, as pandas ignores them in mean computation - predictions = predictions.fillna(-10).values - # Return mean of correct predictions - return (predictions == targets.values).mean() +def compute_roc_auc(predictions, targets): + # Make sure there is no NaN + predictions = predictions.fillna(0.5).values + # Return ROC AUC score + return roc_auc_score(targets.values, predictions) def main(reference_dir, prediction_dir, output_dir): scores = {} for eval_set in EVAL_SETS: - print(f'Scoring {eval_set}') + print(f"Scoring {eval_set}") predictions = pd.read_csv( - prediction_dir / f'{eval_set}_predictions.csv' - ) - targets = pd.read_csv( - reference_dir / f'{eval_set}_labels.csv' + prediction_dir / f"{eval_set}_predictions.csv" ) + targets = pd.read_csv(reference_dir / f"{eval_set}_labels.csv") - scores[eval_set] = float(compute_accuracy(predictions, targets)) + scores[eval_set] = float(compute_roc_auc(predictions, targets)) # Add train and test times in the score - json_durations = (prediction_dir / 'metadata.json').read_text() + json_durations = (prediction_dir / "metadata.json").read_text() durations = json.loads(json_durations) scores.update(**durations) print(scores) # Write output scores output_dir.mkdir(parents=True, exist_ok=True) - (output_dir / 'scores.json').write_text(json.dumps(scores)) + (output_dir / "scores.json").write_text(json.dumps(scores)) if __name__ == "__main__": @@ -68,5 +67,5 @@ def main(reference_dir, prediction_dir, output_dir): main( Path(args.reference_dir), Path(args.prediction_dir), - Path(args.output_dir) + Path(args.output_dir), ) From b2051be8daabc30f12793887c1817f93c4c017b0 Mon Sep 17 00:00:00 2001 From: nnoya Date: Sun, 22 Feb 2026 18:14:27 +0100 Subject: [PATCH 06/14] small fix in setup, create a template for submission, the build the ingestion.py file --- ingestion_program/ingestion.py | 44 ++++++++++++++++++++-------------- solution/submission.py | 29 +++++++++++----------- tools/setup_data.py | 12 ++++------ 3 files changed, 46 insertions(+), 39 deletions(-) diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py index 626f645..6585fa5 100755 --- a/ingestion_program/ingestion.py +++ b/ingestion_program/ingestion.py @@ -9,7 +9,7 @@ # Number of past trading days fed as a sequence to the model. # Must be consistent between training and inference. -WINDOW_SIZE = 20 +WINDOW_SIZE = 50 EVAL_SETS = ["test", "private_test"] @@ -39,10 +39,18 @@ def __init__( self, features_path, labels_path=None, window_size=WINDOW_SIZE ): self.window_size = window_size - self.X = pd.read_csv(features_path).values.astype(np.float32) + # index_col=0: the first column is the row index saved by setup_data.py, + # not a feature — must be excluded from the data arrays. + self.X = pd.read_csv(features_path, index_col=0).values.astype( + np.float32 + ) self.n_features = self.X.shape[1] if labels_path is not None: - self.y = pd.read_csv(labels_path).values.astype(np.float32).ravel() + self.y = ( + pd.read_csv(labels_path, index_col=0) + .values.astype(np.float32) + .ravel() + ) else: self.y = None # test mode — labels are unknown @@ -92,30 +100,30 @@ def get_test_dataset(data_dir, eval_set): def evaluate_model(model, test_dataset): - """Run inference over a test Dataset and return a DataFrame of 0/1 predictions. + """Run inference over a test Dataset and return a DataFrame of probabilities. - The model is expected to output raw logits (one scalar per sample). - Predictions are thresholded at 0.5 after applying sigmoid. + The model outputs probabilities in [0, 1] (sigmoid already applied). + The scoring program is responsible for applying the decision threshold. """ device = next(model.parameters()).device loader = torch.utils.data.DataLoader( test_dataset, batch_size=64, shuffle=False ) - preds = [] + probs = [] model.eval() with torch.no_grad(): for x in loader: # test_dataset returns bare tensors (no label) — x is already the input x = x.to(device) - logits = model(x) # (batch,) - probs = torch.sigmoid(logits) - batch_preds = (probs >= 0.5).long().cpu().numpy().tolist() - preds.extend(batch_preds) - return pd.DataFrame({"Prediction": preds}) + batch_probs = model(x).cpu().numpy().tolist() # floats in [0, 1] + probs.extend(batch_probs) + return pd.DataFrame({"Probability": probs}) def main(data_dir, output_dir): - from submission import get_model # imported here so sys.path is set first + from submission import ( + get_model, + ) # imported here so sys.path is set first data_dir = Path(data_dir) output_dir = Path(output_dir) @@ -164,19 +172,19 @@ def main(data_dir, output_dir): parser.add_argument( "--data-dir", type=str, - default="/app/input_data", - help="", + default="dev_phase/input_data", + help="Root folder containing train/, test/, and private_test/ splits.", ) parser.add_argument( "--output-dir", type=str, - default="/app/output", - help="", + default="ingestion_res", + help="Folder where prediction CSVs and metadata.json will be written.", ) parser.add_argument( "--submission-dir", type=str, - default="/app/ingested_program", + default="solution", help="", ) diff --git a/solution/submission.py b/solution/submission.py index 9530473..436486b 100644 --- a/solution/submission.py +++ b/solution/submission.py @@ -11,8 +11,8 @@ y : FloatTensor of shape (batch,) — binary labels (1 = up, 0 = down) `get_model` must return a trained torch.nn.Module whose forward pass accepts -a tensor of shape (batch, WINDOW_SIZE, n_features) and returns raw logits of -shape (batch,). The ingestion program applies sigmoid + 0.5 threshold itself. +a tensor of shape (batch, WINDOW_SIZE, n_features) and returns probabilities +in [0, 1] of shape (batch,). The ingestion program applies a 0.5 threshold. """ import torch @@ -20,11 +20,11 @@ # ── Hyper-parameters (feel free to tune) ───────────────────────────────────── -HIDDEN_SIZE = 64 -NUM_LAYERS = 2 -DROPOUT = 0.2 -N_EPOCHS = 20 -LEARNING_RATE = 1e-3 +HIDDEN_SIZE = 128 +NUM_LAYERS = 3 +DROPOUT = 0.1 +N_EPOCHS = 3 +LEARNING_RATE = 1e-4 # ───────────────────────────────────────────────────────────────────────────── @@ -37,7 +37,7 @@ class LSTMClassifier(nn.Module): Architecture ------------ LSTM (num_layers, hidden_size, dropout) → hidden state of last timestep - → Linear(hidden_size → 1) → squeeze → logit + → Linear(hidden_size → 1) → squeeze → Sigmoid → probability in [0, 1] """ def __init__( @@ -60,8 +60,9 @@ def __init__( def forward(self, x: torch.Tensor) -> torch.Tensor: # x: (batch, seq_len, input_size) out, _ = self.lstm(x) # (batch, seq_len, hidden_size) - last = out[:, -1, :] # (batch, hidden_size) — last timestep - return self.head(last).squeeze(-1) # (batch,) + last = out[:, -1, :] # (batch, hidden_size) — last timestep + logit = self.head(last).squeeze(-1) # (batch,) + return torch.sigmoid(logit) # (batch,) — probability in [0, 1] def get_model(train_loader: torch.utils.data.DataLoader) -> nn.Module: @@ -76,7 +77,7 @@ def get_model(train_loader: torch.utils.data.DataLoader) -> nn.Module: Returns ------- model : nn.Module (in eval mode) - Trained LSTMClassifier whose forward pass returns raw logits. + Trained LSTMClassifier whose forward pass returns probabilities in [0, 1]. """ # Infer input size from the first batch x_sample, _ = next(iter(train_loader)) @@ -87,7 +88,7 @@ def get_model(train_loader: torch.utils.data.DataLoader) -> nn.Module: model = LSTMClassifier(input_size=input_size).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) - criterion = nn.BCEWithLogitsLoss() + criterion = nn.BCELoss() # model already applies sigmoid model.train() for epoch in range(N_EPOCHS): @@ -95,8 +96,8 @@ def get_model(train_loader: torch.utils.data.DataLoader) -> nn.Module: for x, y in train_loader: x, y = x.to(device), y.to(device) optimizer.zero_grad() - logits = model(x) # (batch,) - loss = criterion(logits, y) + probs = model(x) # (batch,) — probabilities in [0, 1] + loss = criterion(probs, y) loss.backward() optimizer.step() total_loss += loss.item() diff --git a/tools/setup_data.py b/tools/setup_data.py index edbe29e..125549b 100644 --- a/tools/setup_data.py +++ b/tools/setup_data.py @@ -14,7 +14,7 @@ def make_csv(data, filepath): filepath.parent.mkdir(parents=True, exist_ok=True) - data.to_csv(filepath, index=False) + data.to_csv(filepath, index=True) # integer row index saved as first column if __name__ == "__main__": @@ -23,9 +23,9 @@ def make_csv(data, filepath): print(f"Loading data from {RAW_DATA_PATH}") df = pd.read_csv(RAW_DATA_PATH) - # Separate features and target + # Separate features and target; drop Date (not a model input) y = df[TARGET_COL] - X = df.drop(columns=[TARGET_COL]) + X = df.drop(columns=[TARGET_COL, "Date"]).reset_index(drop=True) n = len(df) train_end = int(n * 0.6) @@ -59,7 +59,7 @@ def make_csv(data, filepath): print("\nData splits created successfully!") print( - f"{'Split':<15} {'Samples':<10} {'First Date':<15} {'Last Date':<15}" + f"{'Split':<15} {'Samples':<10} {'Index start':<15} {'Index end':<15}" ) print("-" * 55) for split, X_split in [ @@ -67,8 +67,6 @@ def make_csv(data, filepath): ("test", X_test), ("private_test", X_private_test), ]: - first_date = X_split["Date"].iloc[0] - last_date = X_split["Date"].iloc[-1] print( - f"{split:<15} {len(X_split):<10} {first_date:<15} {last_date:<15}" + f"{split:<15} {len(X_split):<10} {X_split.index[0]:<15} {X_split.index[-1]:<15}" ) From ae49f9a050c8fd1c86803c9ce97570e41177c134 Mon Sep 17 00:00:00 2001 From: nnoya Date: Sun, 22 Feb 2026 18:20:27 +0100 Subject: [PATCH 07/14] Update the requirements --- requirements.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index fda4dd6..41980e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,11 @@ -pandas -scikit-learn +# ── Core runtime (ingestion + scoring + submission) ─────────────────────────── +numpy==2.2.6 +pandas==2.3.3 +scikit-learn==1.7.2 + +# PyTorch CPU build — participants may swap for a GPU wheel if needed +torch==2.8.0 + +# ── Local development tools ─────────────────────────────────────────────────── +# Required only for tools/run_docker.py (not installed inside the Docker image) +docker From 462ffa981aa5244951ce9d6852ec35b78c646693 Mon Sep 17 00:00:00 2001 From: Pedro Marim Date: Sun, 22 Feb 2026 18:30:58 +0100 Subject: [PATCH 08/14] Small fix on scoring function --- scoring_program/scoring.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scoring_program/scoring.py b/scoring_program/scoring.py index 39f675f..971e3ec 100755 --- a/scoring_program/scoring.py +++ b/scoring_program/scoring.py @@ -11,7 +11,7 @@ def compute_roc_auc(predictions, targets): # Make sure there is no NaN predictions = predictions.fillna(0.5).values # Return ROC AUC score - return roc_auc_score(targets.values, predictions) + return roc_auc_score(targets, predictions) def main(reference_dir, prediction_dir, output_dir): @@ -24,7 +24,9 @@ def main(reference_dir, prediction_dir, output_dir): ) targets = pd.read_csv(reference_dir / f"{eval_set}_labels.csv") - scores[eval_set] = float(compute_roc_auc(predictions, targets)) + scores[eval_set] = float( + compute_roc_auc(predictions, targets["Target"].values) + ) # Add train and test times in the score json_durations = (prediction_dir / "metadata.json").read_text() From 487b82bb83b528d7d148522f60c0bc0b32225de2 Mon Sep 17 00:00:00 2001 From: nnoya Date: Sun, 22 Feb 2026 18:40:52 +0100 Subject: [PATCH 09/14] Building Docker --- ingestion_program/ingestion.py | 18 +++++++---- tools/Dockerfile | 59 +++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 24 deletions(-) diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py index 6585fa5..3c3143f 100755 --- a/ingestion_program/ingestion.py +++ b/ingestion_program/ingestion.py @@ -172,20 +172,26 @@ def main(data_dir, output_dir): parser.add_argument( "--data-dir", type=str, - default="dev_phase/input_data", - help="Root folder containing train/, test/, and private_test/ splits.", + default="/app/input_data", + help="Root folder containing train/, test/, and private_test/ splits. " + "Codabench mounts data at /app/input_data. " + "For local testing pass: --data-dir dev_phase/input_data", ) parser.add_argument( "--output-dir", type=str, - default="ingestion_res", - help="Folder where prediction CSVs and metadata.json will be written.", + default="/app/output", + help="Folder where prediction CSVs and metadata.json will be written. " + "Codabench expects output at /app/output. " + "For local testing pass: --output-dir ingestion_res", ) parser.add_argument( "--submission-dir", type=str, - default="solution", - help="", + default="/app/ingested_program", + help="Directory containing submission.py. " + "Codabench mounts participant code at /app/ingested_program. " + "For local testing pass: --submission-dir solution", ) args = parser.parse_args() diff --git a/tools/Dockerfile b/tools/Dockerfile index dd9b157..e3fa61a 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -1,22 +1,45 @@ -# Step 1: Start from an official PyTorch + GPU base image -# (Ubuntu 22.04, Python 3.11, PyTorch, CUDA 12.6, torch 2.8) -FROM pytorch/pytorch:2.8.0-cuda12.6-cudnn9-runtime +# ───────────────────────────────────────────────────────────────────────────── +# Codabench Docker image — S&P 500 Autoregressive Forecasting Challenge +# +# Codabench mounts the following directories at runtime: +# /app/input_data ← input splits (train/, test/, private_test/) +# /app/ingested_program ← participant's submission.py +# /app/ingestion_program ← ingestion.py (organiser code) +# /app/scoring_program ← scoring.py (organiser code) +# /app/output ← ingestion writes predictions here +# /app/input/ref ← reference labels (scoring) +# /app/input/res ← predictions to score (scoring) +# +# Build from the PROJECT ROOT, not from tools/: +# docker build -t sp500-challenge -f tools/Dockerfile . +# +# The build context must be the project root so that requirements.txt is +# accessible via COPY. +# ───────────────────────────────────────────────────────────────────────────── -# Set environment variables to prevent interactive prompts -ENV DEBIAN_FRONTEND=noninteractive +FROM python:3.11-slim -# Step 2: Install system-level dependencies (if any) -# e.g., git, wget, or common libraries for OpenCV like libgl1 -RUN apt-get update && apt-get install -y \ - git \ - wget \ - libgl1-mesa-glx \ - && rm -rf /var/lib/apt/lists/* \ - && pip install -U pip +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 -# Step 3: Copy and pre-install all Python dependencies -# This 'requirements.txt' file should list pandas, scikit-learn, timm, etc. -# Place it in the same directory as this Dockerfile. +# Minimal system deps: gcc is needed to compile some numpy/pandas C extensions +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Pre-install Python dependencies. +# The 'docker' package is a local dev tool — skip it inside the container. +# torch is installed from the official CPU wheel index to avoid pulling the +# full CUDA build (~2 GB saved). COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir -r /tmp/requirements.txt -RUN pip install opencv-python-headless +RUN pip install --no-cache-dir --upgrade pip \ + && grep -v "^docker" /tmp/requirements.txt \ + | grep -v "^#" \ + | grep -v "^$" \ + | grep -v "torch" \ + | pip install --no-cache-dir -r /dev/stdin \ + && pip install --no-cache-dir \ + torch==2.8.0 --index-url https://download.pytorch.org/whl/cpu From f508b2357eab474fb64a2adb0dbbd400ebdaa228 Mon Sep 17 00:00:00 2001 From: nnoya Date: Sun, 22 Feb 2026 18:55:29 +0100 Subject: [PATCH 10/14] Pages --- pages/participate.md | 59 ++++++++++++++++++++++--- pages/seed.md | 102 +++++++++++++++++++++++++++++++++++-------- pages/terms.md | 87 +++++++++++++++++++++++++++++------- pages/timeline.md | 35 +++++++++++---- 4 files changed, 232 insertions(+), 51 deletions(-) diff --git a/pages/participate.md b/pages/participate.md index 4d5427b..c44336d 100755 --- a/pages/participate.md +++ b/pages/participate.md @@ -1,10 +1,55 @@ -# How to participate +# How to Participate -You should submit an untrained model in a python file `model.py` which contains -your `class Model`, which will be imported, trained, and tested on Codalab. +## Objective -See the "Seed" page for the outline of a `Model` class, with the expected -function names. +Build a model that predicts whether the S&P 500 index will **close higher or lower** than the previous day, +using only information available before noon (ET) on the trading day in question. -See the "Timeline" page for additional information about the phases of this -competition +## Input Features + +Each sample in the dataset is a row in a CSV with the following columns (all values are for the **current trading day** or computed from past days only): + +| Column | Description | +|--------|-------------| +| `Open` | Opening price of the current trading day | +| `High` | Intraday high up to the morning window | +| `Low` | Intraday low up to the morning window | +| `Close` | Previous day's closing price | +| `Volume` | Trading volume up to the morning window | + +The ingestion program constructs **sliding windows** of the last 20 trading days for each sample and feeds them to your model as tensors of shape `(batch, 20, n_features)`. + +## Target Label + +- **1** — today's close will be **strictly above** the previous close +- **0** — today's close will be **at or below** the previous close + +## What to Submit + +Submit a single file named **`submission.py`** containing a function: + +```python +def get_model(train_loader): + ... + return model +``` + +`train_loader` is a `torch.utils.data.DataLoader` yielding `(x, y)` batches where: +- `x` has shape `(batch, 20, n_features)` — a sequence of 20 daily feature vectors +- `y` has shape `(batch,)` — binary labels `{0, 1}` + +Your `get_model` function must **train the model** using the provided loader and return a trained `torch.nn.Module` whose `forward(x)` outputs **probabilities in [0, 1]** of shape `(batch,)` — i.e. sigmoid must already be applied. + +See the **Seed** page for a working skeleton to get started. + +## Evaluation Metric + +Submissions are ranked by **ROC-AUC score** on the held-out test set. +A perfect model scores 1.0; random guessing scores ~0.5. + +## Rules + +- Your model may only use information in the provided feature set — no external data sources. +- External Python libraries (e.g. `torch`, `sklearn`, `numpy`) are allowed. +- You may submit as many times as you like during the Development Phase. +- The private test set is only revealed after the phase ends. diff --git a/pages/seed.md b/pages/seed.md index 9b15f6a..0610971 100644 --- a/pages/seed.md +++ b/pages/seed.md @@ -1,21 +1,85 @@ -# Seed: +# Seed — Starter Template +Copy this file as `submission.py` and implement your model inside `get_model`. + +The ingestion program will call `get_model(train_loader)` and expect back a trained +`torch.nn.Module` whose `forward(x)` returns probabilities in **[0, 1]**. + +```python +import torch +import torch.nn as nn + + +def get_model(train_loader): + """ + Train a model on the S&P 500 direction-forecasting task and return it. + + Parameters + ---------- + train_loader : torch.utils.data.DataLoader + Yields (x, y) batches where: + x — FloatTensor of shape (batch, 20, n_features) + A sliding window of the last 20 daily feature vectors. + Features: Open, High, Low, Close, Volume (current and past days). + y — FloatTensor of shape (batch,) + Binary label: 1 if today's close > previous close, else 0. + + Returns + ------- + model : torch.nn.Module + Trained model in eval() mode. + forward(x) must accept shape (batch, 20, n_features) + and return probabilities in [0, 1] of shape (batch,). + The ingestion program applies a 0.5 threshold to produce 0/1 predictions. + """ + + # --- Infer input size from the first batch --- + x_sample, _ = next(iter(train_loader)) + input_size = x_sample.shape[-1] # number of features per timestep + seq_len = x_sample.shape[1] # window size (20) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # --- Define your model here --- + # Example: single-layer LSTM + linear head + sigmoid + class MyModel(nn.Module): + def __init__(self): + super().__init__() + self.lstm = nn.LSTM(input_size, hidden_size=64, + num_layers=1, batch_first=True) + self.head = nn.Linear(64, 1) + + def forward(self, x): + out, _ = self.lstm(x) # (batch, seq_len, 64) + last = out[:, -1, :] # (batch, 64) — last timestep + return torch.sigmoid(self.head(last).squeeze(-1)) # (batch,) + + model = MyModel().to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + criterion = nn.BCELoss() # BCELoss because sigmoid is already applied + + # --- Training loop --- + N_EPOCHS = 10 + model.train() + for epoch in range(N_EPOCHS): + total_loss = 0.0 + for x, y in train_loader: + x, y = x.to(device), y.to(device) + optimizer.zero_grad() + probs = model(x) # (batch,) + loss = criterion(probs, y) + loss.backward() + optimizer.step() + total_loss += loss.item() + print(f"Epoch {epoch+1}/{N_EPOCHS} loss={total_loss/len(train_loader):.4f}") + + model.eval() + return model ``` -class Model: - def fit(self, X_train, y_train): - """ - This should handle the logic of training your model - :param X_train: np.array of training data - :param y_train: np.array of the same length as X_train. Contains classifications of X_train - """ - pass - - def predict(self, X_test): - """ - This should handle making predictions with a trained model - :param X_test: np.array of testing data - :return: np.array of the same length as X_test containing predictions to each point in X_test - """ - pass - -``` \ No newline at end of file + +## Tips + +- You can replace the LSTM with a GRU (`nn.GRU`), Transformer (`nn.TransformerEncoder`), or any other architecture. +- The window size is fixed at **20** timesteps by the ingestion program. +- Keep training time reasonable — the Codabench environment has limited CPU resources. +- You are free to add dropout, batch normalisation, learning rate schedulers, etc. diff --git a/pages/terms.md b/pages/terms.md index 0d69023..ece5c3b 100755 --- a/pages/terms.md +++ b/pages/terms.md @@ -1,18 +1,73 @@ # Terms and Conditions -## Lorem Ipsum - -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. -Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure -dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non -proident, sunt in culpa qui officia deserunt mollit anim id est laborum. - -## Sed ut perspiciatis - -Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae -ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit -aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam -est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore -et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, -nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae -consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur? +## 1. Eligibility + +This challenge is open to students enrolled in the DataCamp courses at École Polytechnique (INF554 / MAP583). Participation is voluntary and free of charge. + +Participants may compete individually or in teams, subject to the rules specified by the course instructors. +2. Data Usage + +The dataset provided for this challenge (historical S&P 500 daily OHLCV data sourced from public market data) is intended solely for educational and research purposes within the scope of this course. + +Participants agree to: + + Use the data only for the purpose of this challenge. + Not redistribute the data outside the course or upload it to public repositories. + Not attempt to identify, reverse-engineer, or misuse the data beyond its intended scientific context. + +3. Training and Evaluation Restrictions + +Participants must comply with the following rules: + + Only the provided training split may be used for training and validation. + The test and private test splits are reserved for evaluation only and must not be used in the training loop. + Any attempt to directly or indirectly train on evaluation data will result in disqualification. + +4. Submission Rules + +Participants must submit: + + A compressed folder containing the code necessary to train a model (see page seed.md). + Any accompanying code or documentation as specified by the instructors. + +Submissions must be the original work of the participants. +5. Academic Integrity + +Participants are expected to adhere to the École Polytechnique’s academic integrity policies. + +Specifically: + + Plagiarism, including copying code or solutions without proper attribution, is prohibited. + The use of external libraries and pre-trained models is allowed unless otherwise stated, provided their use is clearly documented. + Collaboration between teams is not allowed unless explicitly permitted by the instructors. + +Violations of academic integrity rules may result in penalties, including disqualification or academic sanctions. +6. Intellectual Property + +Participants retain ownership of the code and models they develop as part of this challenge. + +By submitting their results, participants grant the course instructors and the hosting institution a non-exclusive, royalty-free right to: + + Use the submissions for grading and evaluation. + Use anonymized results or visualizations for teaching, presentations, or future course materials. + +7. Liability + +The organizers provide the data and evaluation infrastructure “as is” and make no guarantees regarding accuracy, completeness, or fitness for a particular purpose. + +The organizers are not responsible for: + + Technical issues, data loss, or submission failures. + Any damages or losses arising from participation in the challenge. + +8. Modification and Termination + +The organizers reserve the right to: + + Modify the challenge rules, datasets, or evaluation criteria if necessary. + Terminate the challenge in case of technical issues or unforeseen circumstances. + +Any changes will be communicated to participants in a timely manner. +9. Acceptance of Terms + +By participating in the challenge, participants acknowledge that they have read, understood, and agreed to these Terms and Conditions. diff --git a/pages/timeline.md b/pages/timeline.md index 4e613bf..015ea39 100644 --- a/pages/timeline.md +++ b/pages/timeline.md @@ -1,11 +1,28 @@ # Timeline -## Development phase -This phase should be used to tune your models, testing against a small -set of testing data - -## Final phase -Resubmit your preferred submission from the development phase to test -against a new set of testing data. Your model will be trained against -the same set of training data as in the development phase. You may only -make one submission to this phase, so choose wisely. +## Development Phase — October 7, 2025 → March 31, 2026 + +The development phase is open for the full duration of the course. + +- **Training data**: historical S&P 500 daily OHLCV data (roughly 2006–2022). +- **Public test set**: a held-out window of ~250 trading days (~2022–2023). + Your submission is scored against this set after every submission. +- **Submissions**: unlimited — iterate freely and track your progress on the leaderboard. +- **Goal**: tune your model architecture, features, and hyper-parameters to maximise ROC-AUC on the public test set. + +## Private Leaderboard — revealed at end of Development Phase + +Once the development phase closes on **March 31, 2026**, the private test set +(a further ~250 trading days, ~2023–2024) is scored for all submissions. +Final rankings are based on the **private test ROC-AUC**. + +The private test set is completely hidden during the development phase — optimising +sole ly for the public leaderboard may not generalise. + +## Key Dates + +| Date | Event | +|------|-------| +| October 7, 2025 | Competition opens, development phase begins | +| March 31, 2026 | Development phase closes, no further submissions accepted | +| Early April 2026 | Private leaderboard revealed, final rankings published | From aecaee74e986106738e5b031896427305d5f8c54 Mon Sep 17 00:00:00 2001 From: Pedro Marim Date: Sun, 22 Feb 2026 19:17:37 +0100 Subject: [PATCH 11/14] Refined .yaml file --- competition.yaml | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/competition.yaml b/competition.yaml index 44a4a74..0a8960d 100755 --- a/competition.yaml +++ b/competition.yaml @@ -1,26 +1,29 @@ version: 2 -title: "Autoregressive Forecasting of the S&P 500 Index" +title: "Directional Forecasting of the S&P 500 Index" description: > - Can you predict whether the S&P 500 will close up or down — using only what you know by mid-morning? + Can you predict whether the S&P 500 will close UP or DOWN tomorrow? - Each trading day, participants receive a feature vector built from: - - Intraday morning signals: the day's open price and early price action - (e.g. open-to-first-hour return, morning high/low range, opening gap vs previous close). - - Historical context: past N days of daily OHLCV data, log-returns, and - rolling statistics (volatility, momentum) up to and including the previous close. + Each trading day, participants receive a historical feature vector built from + past daily OHLCV data (Open, High, Low, Close, Volume) of the S&P 500 index. - The target label is binary: **1** if the day's close is strictly above the previous close, - **0** otherwise. No look-ahead is permitted — only information available before noon (ET) - may be used as features for the current day. + The target label is binary: **1** if the next trading day's close is strictly + above the current day's close, **0** otherwise. Participants are encouraged to + engineer their own historical context features (e.g., rolling volatility, moving averages) + using the provided sequential data. - Participants submit a scikit-learn–compatible model via a `submission.py` file - exposing a `get_model()` function. The model is trained server-side on historical - data and evaluated on a held-out test window using **directional accuracy** - (fraction of days where the predicted direction matches the actual close direction). + Participants submit a PyTorch model via a `submission.py` file exposing a `get_model(train_loader)` + function. The ingestion program passes a `DataLoader` yielding `(x, y)` batches where: + - `x` is a `FloatTensor` of shape `(batch, WINDOW_SIZE, n_features)` — a sliding window of historical daily features + - `y` is a `FloatTensor` of shape `(batch,)` — binary labels (1 = up, 0 = down) + + `get_model` must return a trained `torch.nn.Module` whose forward pass accepts a tensor of + shape `(batch, WINDOW_SIZE, n_features)` and returns **probabilities in [0, 1]** of shape `(batch,)`. + + Submissions are ranked by their **ROC-AUC** score computed from the predicted probabilities. This is a DataCamp challenge organised at École Polytechnique (INF554 / MAP583). image: logo.png -registration_auto_approve: False # set to True to skip manual approval +registration_auto_approve: False terms: pages/terms.md pages: @@ -35,13 +38,9 @@ tasks: - index: 0 name: Development Task description: > - Same-day close direction forecasting of the S&P 500 using morning information. - Each sample consists of: (i) intraday morning features for the current trading day - (opening gap, open price, early price action) and (ii) historical daily features - from the past N sessions (log-returns, OHLCV, rolling volatility, momentum). - The label is 1 if today's close > previous close, 0 otherwise. - No information after the morning window may be used; models are scored on - directional accuracy over a public held-out test window. + Next-day close direction forecasting of the S&P 500 using sliding windows of daily OHLCV data. + Models must be PyTorch modules trained via `get_model(train_loader)` and must output + probabilities (not hard 0s and 1s) to be properly scored via ROC-AUC over a public held-out test window. input_data: dev_phase/input_data/ reference_data: dev_phase/reference_data/ ingestion_program: ingestion_program/ @@ -56,7 +55,7 @@ solutions: phases: - name: Development Phase description: > - Tune and validate your autoregressive model using the provided historical + Tune and validate your forecasting model using the provided historical S&P 500 training data. Your predictions are scored against a public test set so you can iterate quickly. Unlimited submissions are allowed in this phase. start: 10-07-2025 @@ -68,11 +67,11 @@ leaderboards: - title: Results key: main columns: - - title: Directional Accuracy (public test) + - title: ROC-AUC (public test) key: test index: 0 sorting: desc # higher is better - - title: Directional Accuracy (private test) + - title: ROC-AUC (private test) key: private_test index: 1 sorting: desc @@ -84,4 +83,4 @@ leaderboards: - title: Predict Time (s) key: test_time index: 3 - sorting: asc + sorting: asc \ No newline at end of file From 67da3515a61e87caf5d2c6a776cc0c9bccfe2769 Mon Sep 17 00:00:00 2001 From: nnoya Date: Tue, 3 Mar 2026 16:53:47 +0100 Subject: [PATCH 12/14] Small modificiations in the docker --- competition.yaml | 6 ++++++ scoring_program/scoring.py | 6 +++--- tools/run_docker.py | 19 +++++++++++++------ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/competition.yaml b/competition.yaml index 0a8960d..395bf1e 100755 --- a/competition.yaml +++ b/competition.yaml @@ -45,6 +45,12 @@ tasks: reference_data: dev_phase/reference_data/ ingestion_program: ingestion_program/ scoring_program: scoring_program/ + # Docker image used by Codabench to run ingestion and scoring. + # Build and push with: + # docker build -t /sp500-challenge:latest -f tools/Dockerfile . + # docker push /sp500-challenge:latest + # Then replace the placeholder below with your actual image name. + docker_image: nicolasnoya2001/sp500-challenge:latest solutions: - index: 0 diff --git a/scoring_program/scoring.py b/scoring_program/scoring.py index 971e3ec..27b8f53 100755 --- a/scoring_program/scoring.py +++ b/scoring_program/scoring.py @@ -8,10 +8,10 @@ def compute_roc_auc(predictions, targets): - # Make sure there is no NaN - predictions = predictions.fillna(0.5).values + # Flatten to 1D — .values on a single-column DataFrame gives shape (n, 1) + preds = predictions.iloc[:, 0].fillna(0.5).to_numpy() # Return ROC AUC score - return roc_auc_score(targets, predictions) + return roc_auc_score(targets, preds) def main(reference_dir, prediction_dir, output_dir): diff --git a/tools/run_docker.py b/tools/run_docker.py index d888b76..9cccfda 100644 --- a/tools/run_docker.py +++ b/tools/run_docker.py @@ -1,4 +1,5 @@ from pathlib import Path + try: import docker except ImportError: @@ -14,12 +15,18 @@ print("Docker client initialized successfully.") print("Building Docker image...") - client.images.build(path=".", tag="tommoral/template:v1") - print("Docker image built successfully with tag 'tommoral/template:v1'.") + client.images.build( + path=str(REPO), + dockerfile=str(REPO / "tools" / "Dockerfile"), + tag="nicolasnoya2001/sp500-challenge:latest", + ) + print( + "Docker image built successfully with tag 'nicolasnoya2001/sp500-challenge:latest'." + ) print("Running Docker container...") logs = client.containers.run( - image="tommoral/template:v1", + image="nicolasnoya2001/sp500-challenge:latest", command="python3 /app/ingestion_program/ingestion.py", remove=True, name="ingestion", @@ -29,11 +36,11 @@ f"{REPO}/dev_phase/input_data:/app/input_data", f"{REPO}/ingestion_res:/app/output", f"{REPO}/solution:/app/ingested_program", - ] + ], ) print(logs.decode("utf-8")) logs = client.containers.run( - image="tommoral/template:v1", + image="nicolasnoya2001/sp500-challenge:latest", command="python3 /app/scoring_program/scoring.py", remove=True, name="scoring", @@ -43,7 +50,7 @@ f"{REPO}/dev_phase/reference_data:/app/input/ref", f"{REPO}/ingestion_res:/app/input/res", f"{REPO}/scoring_res:/app/", - ] + ], ) print(logs.decode("utf-8")) print("Docker container ran successfully.") From 99aa36597f90b85fc7fd5f261f85697c89e35721 Mon Sep 17 00:00:00 2001 From: nnoya Date: Fri, 6 Mar 2026 16:40:13 +0100 Subject: [PATCH 13/14] Last fixes to the challenge --- competition.yaml | 2 + pages/participate.md | 26 ++-- pages/seed.md | 12 +- pages/terms.md | 2 +- pages/timeline.md | 3 +- template_starting_kit.ipynb | 251 ++++++++++++++++++++++++++++-------- tools/create_bundle.py | 1 + 7 files changed, 225 insertions(+), 72 deletions(-) diff --git a/competition.yaml b/competition.yaml index 395bf1e..dd59300 100755 --- a/competition.yaml +++ b/competition.yaml @@ -45,6 +45,8 @@ tasks: reference_data: dev_phase/reference_data/ ingestion_program: ingestion_program/ scoring_program: scoring_program/ + public_data: dev_phase/input_data/train + starting_kit: template_starting_kit.ipynb # Docker image used by Codabench to run ingestion and scoring. # Build and push with: # docker build -t /sp500-challenge:latest -f tools/Dockerfile . diff --git a/pages/participate.md b/pages/participate.md index c44336d..1cfc38d 100755 --- a/pages/participate.md +++ b/pages/participate.md @@ -2,8 +2,8 @@ ## Objective -Build a model that predicts whether the S&P 500 index will **close higher or lower** than the previous day, -using only information available before noon (ET) on the trading day in question. +Build a model that predicts whether the S&P 500 index will **close strictly above** the current day's close on the **next trading day**, +using only the provided historical OHLCV features. ## Input Features @@ -11,13 +11,13 @@ Each sample in the dataset is a row in a CSV with the following columns (all val | Column | Description | |--------|-------------| -| `Open` | Opening price of the current trading day | -| `High` | Intraday high up to the morning window | -| `Low` | Intraday low up to the morning window | -| `Close` | Previous day's closing price | -| `Volume` | Trading volume up to the morning window | +| `Open` | Opening price of the trading day | +| `High` | Intraday high | +| `Low` | Intraday low | +| `Close` | Closing price of the trading day | +| `Volume` | Total trading volume | -The ingestion program constructs **sliding windows** of the last 20 trading days for each sample and feeds them to your model as tensors of shape `(batch, 20, n_features)`. +The ingestion program constructs **sliding windows** of the last **50 trading days** for each sample and feeds them to your model as tensors of shape `(batch, 50, n_features)`. ## Target Label @@ -35,10 +35,10 @@ def get_model(train_loader): ``` `train_loader` is a `torch.utils.data.DataLoader` yielding `(x, y)` batches where: -- `x` has shape `(batch, 20, n_features)` — a sequence of 20 daily feature vectors +- `x` has shape `(batch, 50, n_features)` — a sliding window of the last 50 daily feature vectors - `y` has shape `(batch,)` — binary labels `{0, 1}` -Your `get_model` function must **train the model** using the provided loader and return a trained `torch.nn.Module` whose `forward(x)` outputs **probabilities in [0, 1]** of shape `(batch,)` — i.e. sigmoid must already be applied. +Your `get_model` function must **train the model** using the provided loader and return a trained `torch.nn.Module` whose `forward(x)` outputs **probabilities in [0, 1]** of shape `(batch,)` — i.e. sigmoid must already be applied inside `forward`. See the **Seed** page for a working skeleton to get started. @@ -47,6 +47,12 @@ See the **Seed** page for a working skeleton to get started. Submissions are ranked by **ROC-AUC score** on the held-out test set. A perfect model scores 1.0; random guessing scores ~0.5. +## How to Submit + +1. Write your `submission.py` with a `get_model(train_loader)` function. +2. Zip it: `zip submission.zip submission.py` +3. Upload the zip on the **My Submissions** page. + ## Rules - Your model may only use information in the provided feature set — no external data sources. diff --git a/pages/seed.md b/pages/seed.md index 0610971..6853dfe 100644 --- a/pages/seed.md +++ b/pages/seed.md @@ -18,8 +18,8 @@ def get_model(train_loader): ---------- train_loader : torch.utils.data.DataLoader Yields (x, y) batches where: - x — FloatTensor of shape (batch, 20, n_features) - A sliding window of the last 20 daily feature vectors. + x — FloatTensor of shape (batch, 50, n_features) + A sliding window of the last 50 daily feature vectors. Features: Open, High, Low, Close, Volume (current and past days). y — FloatTensor of shape (batch,) Binary label: 1 if today's close > previous close, else 0. @@ -28,15 +28,15 @@ def get_model(train_loader): ------- model : torch.nn.Module Trained model in eval() mode. - forward(x) must accept shape (batch, 20, n_features) + forward(x) must accept shape (batch, 50, n_features) and return probabilities in [0, 1] of shape (batch,). - The ingestion program applies a 0.5 threshold to produce 0/1 predictions. + Probabilities are used directly by the scoring program to compute ROC-AUC. """ # --- Infer input size from the first batch --- x_sample, _ = next(iter(train_loader)) input_size = x_sample.shape[-1] # number of features per timestep - seq_len = x_sample.shape[1] # window size (20) + seq_len = x_sample.shape[1] # window size (50) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") @@ -80,6 +80,6 @@ def get_model(train_loader): ## Tips - You can replace the LSTM with a GRU (`nn.GRU`), Transformer (`nn.TransformerEncoder`), or any other architecture. -- The window size is fixed at **20** timesteps by the ingestion program. +- The window size is fixed at **50** timesteps by the ingestion program. - Keep training time reasonable — the Codabench environment has limited CPU resources. - You are free to add dropout, batch normalisation, learning rate schedulers, etc. diff --git a/pages/terms.md b/pages/terms.md index ece5c3b..e6135f0 100755 --- a/pages/terms.md +++ b/pages/terms.md @@ -27,7 +27,7 @@ Participants must comply with the following rules: Participants must submit: - A compressed folder containing the code necessary to train a model (see page seed.md). + A single file named `submission.py` containing a `get_model(train_loader)` function, zipped as `submission.zip` (see the **Seed** page for a full working template). Any accompanying code or documentation as specified by the instructors. Submissions must be the original work of the participants. diff --git a/pages/timeline.md b/pages/timeline.md index 015ea39..5ef189a 100644 --- a/pages/timeline.md +++ b/pages/timeline.md @@ -16,8 +16,7 @@ Once the development phase closes on **March 31, 2026**, the private test set (a further ~250 trading days, ~2023–2024) is scored for all submissions. Final rankings are based on the **private test ROC-AUC**. -The private test set is completely hidden during the development phase — optimising -sole ly for the public leaderboard may not generalise. +The private test set is completely hidden during the development phase — optimising solely for the public leaderboard may not generalise. ## Key Dates diff --git a/template_starting_kit.ipynb b/template_starting_kit.ipynb index 7167a3a..2434ef9 100644 --- a/template_starting_kit.ipynb +++ b/template_starting_kit.ipynb @@ -4,17 +4,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "
\n", - " \n", - " \n", - " \n", - " \n", - "
\n", + "# Directional Forecasting of the S&P 500 Index\n", "\n", - "# Template Kit for Cadabench challenge in the Datacamp\n", + "*DataCamp Challenge — École Polytechnique (INF554 / MAP583)*\n", "\n", - " Thomas Moreau (Inria)
\n", - " Pedro Rodrigues (Inria) " + "---\n", + "\n", + "Can you predict whether the S&P 500 will close **UP** or **DOWN** tomorrow?\n", + "\n", + "This notebook walks you through the data, the evaluation metric, and how to build and test a submission locally before uploading it to Codabench.\n" ] }, { @@ -23,28 +21,41 @@ "source": [ "## Introduction\n", "\n", - "Describe the challenge, in particular:\n", + "### The Task\n", + "\n", + "This is a **binary classification** challenge: given the recent history of the S&P 500 index, predict whether the next trading day's closing price will be **strictly above** (`1`) or **at or below** (`0`) the current day's closing price.\n", + "\n", + "### The Data\n", + "\n", + "Each row in the dataset represents one **trading day** and contains the following raw OHLCV features:\n", + "\n", + "| Column | Description |\n", + "|----------|-------------|\n", + "| `Open` | Opening price of the day |\n", + "| `High` | Intraday high |\n", + "| `Low` | Intraday low |\n", + "| `Close` | Closing price of the day |\n", + "| `Volume` | Total trading volume |\n", + "\n", + "The ingestion program wraps these rows into **sliding windows of 50 consecutive trading days**, so your model receives sequences of shape `(batch, 50, 5)`.\n", "\n", - "- Where the data comes from?\n", - "- What is the task this challenge aims to solve?\n", - "- Why does it matter?" + "### Why It Matters\n", + "\n", + "Predicting market direction is a canonical and challenging time-series problem — the signal-to-noise ratio is very low, and models that generalise beyond the training period are rare. The challenge rewards robust, well-regularised approaches over overfit ones.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Exploratory data analysis\n", - "\n", - "The goal of this section is to show what's in the data, and how to play with it.\n", - "This is the first set in any data science project, and here, you should give a sense of the data the participants will be working with.\n", + "## Exploratory Data Analysis\n", "\n", - "You can first load and describe the data, and then show some interesting properties of it." + "Let's load the raw training data and get a feel for what we're working with.\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -52,68 +63,184 @@ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", - "pd.set_option('display.max_columns', None)\n", "\n", - "# Load the data\n", - "from ingestion_program.ingestion import get_train_data\n", - "X_df, y = get_train_data(\"dev_phase/input_data\")" + "pd.set_option(\"display.max_columns\", None)\n", + "\n", + "DATA_DIR = \"dev_phase/input_data\"\n", + "\n", + "# Load raw CSV files for exploration\n", + "features = pd.read_csv(f\"{DATA_DIR}/train/train_features.csv\", index_col=0)\n", + "labels = pd.read_csv(f\"{DATA_DIR}/train/train_labels.csv\", index_col=0)\n", + "\n", + "print(f\"Training samples: {len(features)}\")\n", + "print(f\"Features: {list(features.columns)}\")\n", + "print(f\"\\nLabel distribution:\\n{labels['Target'].value_counts().rename({1: 'UP (1)', 0: 'DOWN (0)'})}\")\n", + "features.head()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot the Close price series with UP/DOWN days coloured\n", + "fig, axes = plt.subplots(2, 1, figsize=(14, 7), sharex=True)\n", + "\n", + "up = labels[\"Target\"] == 1\n", + "axes[0].plot(features.index, features[\"Close\"], color=\"steelblue\", linewidth=0.8)\n", + "axes[0].set_title(\"S&P 500 Close Price (training period)\")\n", + "axes[0].set_ylabel(\"Close price\")\n", + "\n", + "axes[1].bar(features.index[up], 1, color=\"green\", width=1, label=\"UP (1)\")\n", + "axes[1].bar(features.index[~up], 1, color=\"red\", width=1, label=\"DOWN (0)\")\n", + "axes[1].set_title(\"Daily direction label\")\n", + "axes[1].set_ylabel(\"Label\")\n", + "axes[1].legend(loc=\"upper right\")\n", + "axes[1].set_xlabel(\"Trading day index\")\n", + "\n", + "plt.tight_layout()\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Feature statistics\n", + "features.describe().round(2)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Challenge evaluation\n", + "## Challenge Evaluation\n", + "\n", + "Submissions are ranked by **ROC-AUC** (Area Under the ROC Curve) computed on a held-out test window.\n", + "\n", + "- A **perfect** model scores **1.0**\n", + "- **Random guessing** (outputting 0.5 for every sample) scores **≈ 0.5**\n", + "- Predicting hard 0/1 labels instead of probabilities will likely score around 0.5 — always output **sigmoid probabilities**.\n", "\n", - "A particularly important point in a challenge is to describe how it is evaluated. This is the section where you should describe the metric that will be used to evaluate the participants' submissions, as well as your evaluation strategy, in particular if there is some complexity in the way the data should be split to ensure valid results." + "The key advantage of ROC-AUC is that it is **threshold-independent**: it rewards models that rank UP days above DOWN days regardless of the absolute probability values they produce.\n", + "\n", + "There are two splits:\n", + "- **Public test** — visible on the leaderboard during the development phase \n", + "- **Private test** — revealed only after the phase ends (final ranking)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# Submission format\n", + "## Submission Format\n", + "\n", + "You must submit a **single file** named `submission.py` that exposes one function:\n", + "\n", + "```python\n", + "def get_model(train_loader: torch.utils.data.DataLoader) -> torch.nn.Module:\n", + " ...\n", + " return model # already in eval mode\n", + "```\n", "\n", - "Here, you should describe the submission format. This is the format the participants should follow to submit their predictions on the codabench platform." + "**Contract:**\n", + "- `train_loader` yields `(x, y)` batches where `x` has shape `(batch, 50, 5)` and `y` has shape `(batch,)` with values in `{0, 1}`\n", + "- The returned model's `forward(x)` must accept `(batch, 50, 5)` tensors and return **probabilities in [0, 1]** of shape `(batch,)` — i.e. apply `sigmoid` inside `forward`\n", + "\n", + "The cell below shows the reference LSTM baseline included in the challenge.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## The submission file\n", + "### Baseline: LSTM Classifier\n", "\n", - "The input data are stored in a dataframe. To go from a dataframe to a numpy array we will use a scikit-learn column transformer. The first example we will write will just consist in selecting a subset of columns we want to work with." + "The baseline uses a multi-layer LSTM that reads the 50-day window and outputs a direction probability from the last hidden state.\n", + "Feel free to replace this architecture entirely — a Transformer, a 1-D CNN, or even a simple MLP over flattened windows are all valid approaches.\n" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# %load solution/submission.py\n", - "from sklearn.ensemble import RandomForestClassifier\n", + "import torch\n", + "import torch.nn as nn\n", + "\n", + "# ── Hyper-parameters ──────────────────────────────────────────────────────────\n", + "HIDDEN_SIZE = 128\n", + "NUM_LAYERS = 3\n", + "DROPOUT = 0.1\n", + "N_EPOCHS = 3\n", + "LEARNING_RATE = 1e-4\n", + "# ─────────────────────────────────────────────────────────────────────────────\n", + "\n", + "\n", + "class LSTMClassifier(nn.Module):\n", + " \"\"\"Sequence-to-one LSTM: (batch, seq_len, n_features) → (batch,) probability.\"\"\"\n", + "\n", + " def __init__(self, input_size, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, dropout=DROPOUT):\n", + " super().__init__()\n", + " self.lstm = nn.LSTM(\n", + " input_size=input_size,\n", + " hidden_size=hidden_size,\n", + " num_layers=num_layers,\n", + " batch_first=True,\n", + " dropout=dropout if num_layers > 1 else 0.0,\n", + " )\n", + " self.head = nn.Linear(hidden_size, 1)\n", + "\n", + " def forward(self, x):\n", + " out, _ = self.lstm(x) # (batch, seq_len, hidden_size)\n", + " last = out[:, -1, :] # (batch, hidden_size) — last timestep\n", + " logit = self.head(last).squeeze(-1) # (batch,)\n", + " return torch.sigmoid(logit) # probability in [0, 1]\n", "\n", "\n", - "# The submission here should simply be a function that returns a model\n", - "# compatible with scikit-learn API\n", - "def get_model():\n", - " return RandomForestClassifier()\n" + "def get_model(train_loader):\n", + " x_sample, _ = next(iter(train_loader))\n", + " input_size = x_sample.shape[-1] # n_features (5 for raw OHLCV)\n", + "\n", + " device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + " model = LSTMClassifier(input_size=input_size).to(device)\n", + " optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)\n", + " criterion = nn.BCELoss()\n", + "\n", + " model.train()\n", + " for epoch in range(N_EPOCHS):\n", + " total_loss = 0.0\n", + " for x, y in train_loader:\n", + " x, y = x.to(device), y.to(device)\n", + " optimizer.zero_grad()\n", + " loss = criterion(model(x), y)\n", + " loss.backward()\n", + " optimizer.step()\n", + " total_loss += loss.item()\n", + " print(f\"Epoch {epoch + 1}/{N_EPOCHS} loss={total_loss / len(train_loader):.4f}\")\n", + "\n", + " model.eval()\n", + " return model\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Local testing pipeline\n", + "## Local Testing Pipeline\n", "\n", - "Here you can show how the model will be used to generate predictions on the test set, and how the evaluation will be performed." + "Before submitting to Codabench you can run the full ingestion + scoring pipeline locally.\n", + "This mirrors exactly what happens on the platform.\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -133,31 +260,49 @@ } ], "source": [ - "model = get_model()\n", - "X_train, y_train = get_train_data(\"dev_phase/input_data\")\n", - "model.fit(X_train, y_train)\n", + "import sys\n", + "sys.path.insert(0, \".\") # make sure ingestion_program/ and solution/ are importable\n", + "\n", + "import torch\n", + "from ingestion_program.ingestion import get_train_dataset, get_test_dataset, evaluate_model\n", + "from scoring_program.scoring import compute_roc_auc\n", + "\n", + "DATA_DIR = \"dev_phase/input_data\"\n", + "\n", + "# ── 1. Build training DataLoader ──────────────────────────────────────────────\n", + "train_dataset = get_train_dataset(DATA_DIR)\n", + "train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)\n", + "print(f\"Training samples : {len(train_dataset)}\")\n", + "\n", + "# ── 2. Train the model ────────────────────────────────────────────────────────\n", + "model = get_model(train_loader)\n", "\n", - "X_test = pd.read_csv(\"dev_phase/input_data/test/test_features.csv\")\n", - "from ingestion_program.ingestion import evaluate_model\n", - "y_test = evaluate_model(model, X_test)\n", + "# ── 3. Predict on the public test set ────────────────────────────────────────\n", + "test_dataset = get_test_dataset(DATA_DIR, \"test\")\n", + "predictions = evaluate_model(model, test_dataset) # DataFrame with \"Probability\" column\n", "\n", - "from scoring_program.scoring import compute_accuracy\n", - "print(\"Accuracy on test set:\", compute_accuracy(y_test, pd.read_csv(\"dev_phase/input_data/test/test_labels.csv\")))" + "# ── 4. Score against the reference labels ────────────────────────────────────\n", + "import pandas as pd\n", + "test_labels = pd.read_csv(\"dev_phase/reference_data/test_labels.csv\")\n", + "auc = compute_roc_auc(predictions, test_labels[\"Target\"].values)\n", + "print(f\"\\nROC-AUC on public test set: {auc:.4f}\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Submission\n", + "## Submitting to Codabench\n", + "\n", + "1. Write your `submission.py` — it must define `get_model(train_loader)` returning a trained `nn.Module`.\n", + "2. Create a zip containing only that file:\n", + " ```bash\n", + " zip submission.zip submission.py\n", + " ```\n", + "3. Go to the challenge page on Codabench → **My Submissions** → **Upload**.\n", "\n", - "To submit your code, you can refer to the actual challenge." + "That's it — the platform will run your code, output predictions, compute the ROC-AUC, and update the leaderboard automatically.\n" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] } ], "metadata": { diff --git a/tools/create_bundle.py b/tools/create_bundle.py index 9f612ec..01e1398 100644 --- a/tools/create_bundle.py +++ b/tools/create_bundle.py @@ -13,6 +13,7 @@ ROOT_DIR / "competition.yaml", ROOT_DIR / "logo.png", ROOT_DIR / "solution/submission.py", + ROOT_DIR / "template_starting_kit.ipynb", ] From d0e211413c1b406968a0e839804742ccb85eff83 Mon Sep 17 00:00:00 2001 From: nnoya Date: Tue, 10 Mar 2026 18:05:53 +0100 Subject: [PATCH 14/14] fix docker image --- competition.yaml | 11 +++++------ ingestion_program/metadata.yaml | 3 ++- pages/data.md | 4 ++++ requirements.txt | 4 ++-- scoring_program/metadata.yaml | 3 ++- tools/Dockerfile | 22 ++++++++++------------ tools/run_docker.py | 8 ++++---- 7 files changed, 29 insertions(+), 26 deletions(-) create mode 100644 pages/data.md diff --git a/competition.yaml b/competition.yaml index dd59300..451fbf8 100755 --- a/competition.yaml +++ b/competition.yaml @@ -1,5 +1,10 @@ version: 2 title: "Directional Forecasting of the S&P 500 Index" +# Docker image used by Codabench to run ingestion and scoring. +# Build and push with: +# docker build -t nicolasnoya2001/sp500-challenge:v2 -f tools/Dockerfile . +# docker push nicolasnoya2001/sp500-challenge:v2 +docker_image: nicolasnoya2001/sp500-challenge:v2 description: > Can you predict whether the S&P 500 will close UP or DOWN tomorrow? @@ -47,12 +52,6 @@ tasks: scoring_program: scoring_program/ public_data: dev_phase/input_data/train starting_kit: template_starting_kit.ipynb - # Docker image used by Codabench to run ingestion and scoring. - # Build and push with: - # docker build -t /sp500-challenge:latest -f tools/Dockerfile . - # docker push /sp500-challenge:latest - # Then replace the placeholder below with your actual image name. - docker_image: nicolasnoya2001/sp500-challenge:latest solutions: - index: 0 diff --git a/ingestion_program/metadata.yaml b/ingestion_program/metadata.yaml index bcf0d24..8a64ce5 100755 --- a/ingestion_program/metadata.yaml +++ b/ingestion_program/metadata.yaml @@ -1 +1,2 @@ -command: python3 ingestion.py \ No newline at end of file +command: python3 ingestion.py +image: nicolasnoya2001/sp500-challenge:v2 diff --git a/pages/data.md b/pages/data.md new file mode 100644 index 0000000..22ee78c --- /dev/null +++ b/pages/data.md @@ -0,0 +1,4 @@ +You can download the data for this challenge from here: + +- Training Features: https://nicolas-public-images.s3.us-east-1.amazonaws.com/train/train_features.csv +- True Labels: https://nicolas-public-images.s3.us-east-1.amazonaws.com/train/train_labels.csv \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 41980e6..2956b1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ # ── Core runtime (ingestion + scoring + submission) ─────────────────────────── numpy==2.2.6 pandas==2.3.3 -scikit-learn==1.7.2 +scikit-learn # PyTorch CPU build — participants may swap for a GPU wheel if needed -torch==2.8.0 +torch # ── Local development tools ─────────────────────────────────────────────────── # Required only for tools/run_docker.py (not installed inside the Docker image) diff --git a/scoring_program/metadata.yaml b/scoring_program/metadata.yaml index 1dfabf8..fabbca7 100755 --- a/scoring_program/metadata.yaml +++ b/scoring_program/metadata.yaml @@ -1 +1,2 @@ -command: python3 scoring.py \ No newline at end of file +command: python3 scoring.py +image: nicolasnoya2001/sp500-challenge:v2 diff --git a/tools/Dockerfile b/tools/Dockerfile index e3fa61a..3724efa 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -31,15 +31,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app # Pre-install Python dependencies. -# The 'docker' package is a local dev tool — skip it inside the container. -# torch is installed from the official CPU wheel index to avoid pulling the -# full CUDA build (~2 GB saved). -COPY requirements.txt /tmp/requirements.txt -RUN pip install --no-cache-dir --upgrade pip \ - && grep -v "^docker" /tmp/requirements.txt \ - | grep -v "^#" \ - | grep -v "^$" \ - | grep -v "torch" \ - | pip install --no-cache-dir -r /dev/stdin \ - && pip install --no-cache-dir \ - torch==2.8.0 --index-url https://download.pytorch.org/whl/cpu +# torch is installed explicitly with the CPU-only wheel to keep the image small. +# The 'docker' package is a local dev tool and is excluded from the container. +RUN pip install --no-cache-dir --upgrade pip + +# Install torch CPU wheel first (separate layer so it's cached independently) +# RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu +RUN pip install torch --index-url https://download.pytorch.org/whl/cpu + +# Install remaining runtime deps (excluding torch and docker which are handled separately) +RUN pip install --no-cache-dir numpy pandas scikit-learn diff --git a/tools/run_docker.py b/tools/run_docker.py index 9cccfda..0413a40 100644 --- a/tools/run_docker.py +++ b/tools/run_docker.py @@ -18,15 +18,15 @@ client.images.build( path=str(REPO), dockerfile=str(REPO / "tools" / "Dockerfile"), - tag="nicolasnoya2001/sp500-challenge:latest", + tag="nicolasnoya2001/sp500-challenge:v2", ) print( - "Docker image built successfully with tag 'nicolasnoya2001/sp500-challenge:latest'." + "Docker image built successfully with tag 'nicolasnoya2001/sp500-challenge:v2'." ) print("Running Docker container...") logs = client.containers.run( - image="nicolasnoya2001/sp500-challenge:latest", + image="nicolasnoya2001/sp500-challenge:v2", command="python3 /app/ingestion_program/ingestion.py", remove=True, name="ingestion", @@ -40,7 +40,7 @@ ) print(logs.decode("utf-8")) logs = client.containers.run( - image="nicolasnoya2001/sp500-challenge:latest", + image="nicolasnoya2001/sp500-challenge:v2", command="python3 /app/scoring_program/scoring.py", remove=True, name="scoring",