diff --git a/.gitignore b/.gitignore index 9728749..4c2f2aa 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ ingestion_res/* scoring_res/* dev_phase/* +*.pth diff --git a/competition.yaml b/competition.yaml index 3a1e4de..451fbf8 100755 --- a/competition.yaml +++ b/competition.yaml @@ -1,8 +1,34 @@ version: 2 -title: Templat competition - Dummy classification -description: Dummy classification task +title: "Directional Forecasting of the S&P 500 Index" +# Docker image used by Codabench to run ingestion and scoring. +# Build and push with: +# docker build -t nicolasnoya2001/sp500-challenge:v2 -f tools/Dockerfile . +# docker push nicolasnoya2001/sp500-challenge:v2 +docker_image: nicolasnoya2001/sp500-challenge:v2 +description: > + Can you predict whether the S&P 500 will close UP or DOWN tomorrow? + + Each trading day, participants receive a historical feature vector built from + past daily OHLCV data (Open, High, Low, Close, Volume) of the S&P 500 index. + + The target label is binary: **1** if the next trading day's close is strictly + above the current day's close, **0** otherwise. Participants are encouraged to + engineer their own historical context features (e.g., rolling volatility, moving averages) + using the provided sequential data. + + Participants submit a PyTorch model via a `submission.py` file exposing a `get_model(train_loader)` + function. The ingestion program passes a `DataLoader` yielding `(x, y)` batches where: + - `x` is a `FloatTensor` of shape `(batch, WINDOW_SIZE, n_features)` — a sliding window of historical daily features + - `y` is a `FloatTensor` of shape `(batch,)` — binary labels (1 = up, 0 = down) + + `get_model` must return a trained `torch.nn.Module` whose forward pass accepts a tensor of + shape `(batch, WINDOW_SIZE, n_features)` and returns **probabilities in [0, 1]** of shape `(batch,)`. + + Submissions are ranked by their **ROC-AUC** score computed from the predicted probabilities. + + This is a DataCamp challenge organised at École Polytechnique (INF554 / MAP583). image: logo.png -registration_auto_approve: False # if True, do not require approval from admin to join the comp +registration_auto_approve: False terms: pages/terms.md pages: @@ -15,23 +41,30 @@ pages: tasks: - index: 0 - name: Developement Task - description: 'Tune models with training data, test against examples contained in public test data' + name: Development Task + description: > + Next-day close direction forecasting of the S&P 500 using sliding windows of daily OHLCV data. + Models must be PyTorch modules trained via `get_model(train_loader)` and must output + probabilities (not hard 0s and 1s) to be properly scored via ROC-AUC over a public held-out test window. input_data: dev_phase/input_data/ reference_data: dev_phase/reference_data/ ingestion_program: ingestion_program/ scoring_program: scoring_program/ + public_data: dev_phase/input_data/train + starting_kit: template_starting_kit.ipynb solutions: - index: 0 tasks: - - 0 + - 0 path: solution/ - phases: - name: Development Phase - description: 'Development phase: tune your models.' + description: > + Tune and validate your forecasting model using the provided historical + S&P 500 training data. Your predictions are scored against a public test set + so you can iterate quickly. Unlimited submissions are allowed in this phase. start: 10-07-2025 end: 03-31-2026 tasks: @@ -41,20 +74,20 @@ leaderboards: - title: Results key: main columns: - - title: Test Accuracy + - title: ROC-AUC (public test) key: test index: 0 - sorting: asc - - title: Private Test Accuracy + sorting: desc # higher is better + - title: ROC-AUC (private test) key: private_test index: 1 - sorting: asc - hidden: True - - title: Train time + sorting: desc + hidden: True # revealed only after the phase ends + - title: Train Time (s) key: train_time index: 2 - sorting: desc - - title: Test time + sorting: asc # lower is better + - title: Predict Time (s) key: test_time index: 3 - sorting: desc + sorting: asc \ No newline at end of file diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py index f150b05..3c3143f 100755 --- a/ingestion_program/ingestion.py +++ b/ingestion_program/ingestion.py @@ -3,53 +3,156 @@ import time from pathlib import Path +import numpy as np import pandas as pd +import torch +# Number of past trading days fed as a sequence to the model. +# Must be consistent between training and inference. +WINDOW_SIZE = 50 EVAL_SETS = ["test", "private_test"] -def evaluate_model(model, X_test): - - y_pred = model.predict(X_test) - return pd.DataFrame(y_pred) +class SP500Dataset(torch.utils.data.Dataset): + """PyTorch Dataset for the S&P 500 direction-forecasting challenge. + + Each sample is a sliding window of shape (WINDOW_SIZE, n_features) + ending at day `idx`. The target is the binary label of that last day + (1 = close > prev_close, 0 otherwise). + + For the first WINDOW_SIZE-1 days, the window is left-padded with zeros. + + Parameters + ---------- + features_path : Path + Path to the features CSV (columns = feature names, rows = trading days + in chronological order). + labels_path : Path or None + Path to the labels CSV (single column, same row order as features). + Pass None for test sets where labels are withheld. + window_size : int + Number of past days (inclusive of the current day) in each sequence. + """ + + def __init__( + self, features_path, labels_path=None, window_size=WINDOW_SIZE + ): + self.window_size = window_size + # index_col=0: the first column is the row index saved by setup_data.py, + # not a feature — must be excluded from the data arrays. + self.X = pd.read_csv(features_path, index_col=0).values.astype( + np.float32 + ) + self.n_features = self.X.shape[1] + if labels_path is not None: + self.y = ( + pd.read_csv(labels_path, index_col=0) + .values.astype(np.float32) + .ravel() + ) + else: + self.y = None # test mode — labels are unknown + + def __len__(self): + return len(self.X) + + def __getitem__(self, idx): + """Return (window, label) where window has shape (window_size, n_features). + + The label is the binary target for day `idx` (the last day of the window). + During test mode (no labels), only the window tensor is returned. + """ + window_start = max(0, idx - self.window_size + 1) + window = self.X[window_start : idx + 1] # (<=window_size, n_features) + + # Left-pad with zeros if we are at the beginning of the series + if len(window) < self.window_size: + padding = np.zeros( + (self.window_size - len(window), self.n_features), + dtype=np.float32, + ) + window = np.concatenate([padding, window], axis=0) + + x = torch.tensor( + window, dtype=torch.float32 + ) # (window_size, n_features) + + if self.y is not None: + y = torch.tensor(self.y[idx], dtype=torch.float32) # scalar + return x, y + return x # test mode + + +def get_train_dataset(data_dir): + """Build the training Dataset from separate features and labels CSVs.""" + data_dir = Path(data_dir) + features_path = data_dir / "train" / "train_features.csv" + labels_path = data_dir / "train" / "train_labels.csv" + return SP500Dataset(features_path, labels_path) -def get_train_data(data_dir): +def get_test_dataset(data_dir, eval_set): + """Build a test Dataset (no labels) for a given evaluation split.""" data_dir = Path(data_dir) - training_dir = data_dir / "train" - X_train = pd.read_csv(training_dir / "train_features.csv") - y_train = pd.read_csv(training_dir / "train_labels.csv") - return X_train, y_train + features_path = data_dir / eval_set / f"{eval_set}_features.csv" + return SP500Dataset(features_path, labels_path=None) -def main(data_dir, output_dir): - # Here, you can import info from the submission module, to evaluate the - # submission - from submission import get_model +def evaluate_model(model, test_dataset): + """Run inference over a test Dataset and return a DataFrame of probabilities. - X_train, y_train = get_train_data(data_dir) + The model outputs probabilities in [0, 1] (sigmoid already applied). + The scoring program is responsible for applying the decision threshold. + """ + device = next(model.parameters()).device + loader = torch.utils.data.DataLoader( + test_dataset, batch_size=64, shuffle=False + ) + probs = [] + model.eval() + with torch.no_grad(): + for x in loader: + # test_dataset returns bare tensors (no label) — x is already the input + x = x.to(device) + batch_probs = model(x).cpu().numpy().tolist() # floats in [0, 1] + probs.extend(batch_probs) + return pd.DataFrame({"Probability": probs}) - print("Training the model") - model = get_model() +def main(data_dir, output_dir): + from submission import ( + get_model, + ) # imported here so sys.path is set first + data_dir = Path(data_dir) + output_dir = Path(output_dir) + + # ── Training ────────────────────────────────────────────────────────────── + train_dataset = get_train_dataset(data_dir) + train_loader = torch.utils.data.DataLoader( + train_dataset, batch_size=32, shuffle=True + ) + + print("Training the model") start = time.time() - model.fit(X_train, y_train) + model = get_model(train_loader) # participant trains and returns the model train_time = time.time() - start - print("-" * 10) + + # ── Evaluation ──────────────────────────────────────────────────────────── + print("=" * 40) print("Evaluate the model") start = time.time() res = {} for eval_set in EVAL_SETS: - X_test = pd.read_csv(data_dir / eval_set / f"{eval_set}_features.csv") - res[eval_set] = evaluate_model(model, X_test) + test_dataset = get_test_dataset(data_dir, eval_set) + res[eval_set] = evaluate_model(model, test_dataset) test_time = time.time() - start - print("-" * 10) - duration = train_time + test_time - print(f"Completed Prediction. Total duration: {duration}") + print( + f"Completed Prediction. Total duration: {train_time + test_time:.1f}s" + ) - # Write output files + # ── Write outputs ───────────────────────────────────────────────────────── output_dir.mkdir(parents=True, exist_ok=True) with open(output_dir / "metadata.json", "w+") as f: json.dump(dict(train_time=train_time, test_time=test_time), f) @@ -70,19 +173,25 @@ def main(data_dir, output_dir): "--data-dir", type=str, default="/app/input_data", - help="", + help="Root folder containing train/, test/, and private_test/ splits. " + "Codabench mounts data at /app/input_data. " + "For local testing pass: --data-dir dev_phase/input_data", ) parser.add_argument( "--output-dir", type=str, default="/app/output", - help="", + help="Folder where prediction CSVs and metadata.json will be written. " + "Codabench expects output at /app/output. " + "For local testing pass: --output-dir ingestion_res", ) parser.add_argument( "--submission-dir", type=str, default="/app/ingested_program", - help="", + help="Directory containing submission.py. " + "Codabench mounts participant code at /app/ingested_program. " + "For local testing pass: --submission-dir solution", ) args = parser.parse_args() diff --git a/ingestion_program/metadata.yaml b/ingestion_program/metadata.yaml index bcf0d24..8a64ce5 100755 --- a/ingestion_program/metadata.yaml +++ b/ingestion_program/metadata.yaml @@ -1 +1,2 @@ -command: python3 ingestion.py \ No newline at end of file +command: python3 ingestion.py +image: nicolasnoya2001/sp500-challenge:v2 diff --git a/logo.png b/logo.png index 9616456..5255f04 100644 Binary files a/logo.png and b/logo.png differ diff --git a/pages/data.md b/pages/data.md new file mode 100644 index 0000000..22ee78c --- /dev/null +++ b/pages/data.md @@ -0,0 +1,4 @@ +You can download the data for this challenge from here: + +- Training Features: https://nicolas-public-images.s3.us-east-1.amazonaws.com/train/train_features.csv +- True Labels: https://nicolas-public-images.s3.us-east-1.amazonaws.com/train/train_labels.csv \ No newline at end of file diff --git a/pages/participate.md b/pages/participate.md index 4d5427b..1cfc38d 100755 --- a/pages/participate.md +++ b/pages/participate.md @@ -1,10 +1,61 @@ -# How to participate +# How to Participate -You should submit an untrained model in a python file `model.py` which contains -your `class Model`, which will be imported, trained, and tested on Codalab. +## Objective -See the "Seed" page for the outline of a `Model` class, with the expected -function names. +Build a model that predicts whether the S&P 500 index will **close strictly above** the current day's close on the **next trading day**, +using only the provided historical OHLCV features. -See the "Timeline" page for additional information about the phases of this -competition +## Input Features + +Each sample in the dataset is a row in a CSV with the following columns (all values are for the **current trading day** or computed from past days only): + +| Column | Description | +|--------|-------------| +| `Open` | Opening price of the trading day | +| `High` | Intraday high | +| `Low` | Intraday low | +| `Close` | Closing price of the trading day | +| `Volume` | Total trading volume | + +The ingestion program constructs **sliding windows** of the last **50 trading days** for each sample and feeds them to your model as tensors of shape `(batch, 50, n_features)`. + +## Target Label + +- **1** — today's close will be **strictly above** the previous close +- **0** — today's close will be **at or below** the previous close + +## What to Submit + +Submit a single file named **`submission.py`** containing a function: + +```python +def get_model(train_loader): + ... + return model +``` + +`train_loader` is a `torch.utils.data.DataLoader` yielding `(x, y)` batches where: +- `x` has shape `(batch, 50, n_features)` — a sliding window of the last 50 daily feature vectors +- `y` has shape `(batch,)` — binary labels `{0, 1}` + +Your `get_model` function must **train the model** using the provided loader and return a trained `torch.nn.Module` whose `forward(x)` outputs **probabilities in [0, 1]** of shape `(batch,)` — i.e. sigmoid must already be applied inside `forward`. + +See the **Seed** page for a working skeleton to get started. + +## Evaluation Metric + +Submissions are ranked by **ROC-AUC score** on the held-out test set. +A perfect model scores 1.0; random guessing scores ~0.5. + +## How to Submit + +1. Write your `submission.py` with a `get_model(train_loader)` function. +2. Zip it: `zip submission.zip submission.py` +3. Upload the zip on the **My Submissions** page. + +## Rules + +- Your model may only use information in the provided feature set — no external data sources. +- External Python libraries (e.g. `torch`, `sklearn`, `numpy`) are allowed. +- You may submit as many times as you like during the Development Phase. +- The private test set is only revealed after the phase ends. diff --git a/pages/seed.md b/pages/seed.md index 9b15f6a..6853dfe 100644 --- a/pages/seed.md +++ b/pages/seed.md @@ -1,21 +1,85 @@ -# Seed: +# Seed — Starter Template +Copy this file as `submission.py` and implement your model inside `get_model`. + +The ingestion program will call `get_model(train_loader)` and expect back a trained +`torch.nn.Module` whose `forward(x)` returns probabilities in **[0, 1]**. + +```python +import torch +import torch.nn as nn + + +def get_model(train_loader): + """ + Train a model on the S&P 500 direction-forecasting task and return it. + + Parameters + ---------- + train_loader : torch.utils.data.DataLoader + Yields (x, y) batches where: + x — FloatTensor of shape (batch, 50, n_features) + A sliding window of the last 50 daily feature vectors. + Features: Open, High, Low, Close, Volume (current and past days). + y — FloatTensor of shape (batch,) + Binary label: 1 if today's close > previous close, else 0. + + Returns + ------- + model : torch.nn.Module + Trained model in eval() mode. + forward(x) must accept shape (batch, 50, n_features) + and return probabilities in [0, 1] of shape (batch,). + Probabilities are used directly by the scoring program to compute ROC-AUC. + """ + + # --- Infer input size from the first batch --- + x_sample, _ = next(iter(train_loader)) + input_size = x_sample.shape[-1] # number of features per timestep + seq_len = x_sample.shape[1] # window size (50) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + # --- Define your model here --- + # Example: single-layer LSTM + linear head + sigmoid + class MyModel(nn.Module): + def __init__(self): + super().__init__() + self.lstm = nn.LSTM(input_size, hidden_size=64, + num_layers=1, batch_first=True) + self.head = nn.Linear(64, 1) + + def forward(self, x): + out, _ = self.lstm(x) # (batch, seq_len, 64) + last = out[:, -1, :] # (batch, 64) — last timestep + return torch.sigmoid(self.head(last).squeeze(-1)) # (batch,) + + model = MyModel().to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + criterion = nn.BCELoss() # BCELoss because sigmoid is already applied + + # --- Training loop --- + N_EPOCHS = 10 + model.train() + for epoch in range(N_EPOCHS): + total_loss = 0.0 + for x, y in train_loader: + x, y = x.to(device), y.to(device) + optimizer.zero_grad() + probs = model(x) # (batch,) + loss = criterion(probs, y) + loss.backward() + optimizer.step() + total_loss += loss.item() + print(f"Epoch {epoch+1}/{N_EPOCHS} loss={total_loss/len(train_loader):.4f}") + + model.eval() + return model ``` -class Model: - def fit(self, X_train, y_train): - """ - This should handle the logic of training your model - :param X_train: np.array of training data - :param y_train: np.array of the same length as X_train. Contains classifications of X_train - """ - pass - - def predict(self, X_test): - """ - This should handle making predictions with a trained model - :param X_test: np.array of testing data - :return: np.array of the same length as X_test containing predictions to each point in X_test - """ - pass - -``` \ No newline at end of file + +## Tips + +- You can replace the LSTM with a GRU (`nn.GRU`), Transformer (`nn.TransformerEncoder`), or any other architecture. +- The window size is fixed at **50** timesteps by the ingestion program. +- Keep training time reasonable — the Codabench environment has limited CPU resources. +- You are free to add dropout, batch normalisation, learning rate schedulers, etc. diff --git a/pages/terms.md b/pages/terms.md index 0d69023..e6135f0 100755 --- a/pages/terms.md +++ b/pages/terms.md @@ -1,18 +1,73 @@ # Terms and Conditions -## Lorem Ipsum - -Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. -Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure -dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non -proident, sunt in culpa qui officia deserunt mollit anim id est laborum. - -## Sed ut perspiciatis - -Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium, totam rem aperiam, eaque ipsa quae -ab illo inventore veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam voluptatem quia voluptas sit -aspernatur aut odit aut fugit, sed quia consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam -est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore -et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis suscipit laboriosam, -nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae -consequatur, vel illum qui dolorem eum fugiat quo voluptas nulla pariatur? +## 1. Eligibility + +This challenge is open to students enrolled in the DataCamp courses at École Polytechnique (INF554 / MAP583). Participation is voluntary and free of charge. + +Participants may compete individually or in teams, subject to the rules specified by the course instructors. +2. Data Usage + +The dataset provided for this challenge (historical S&P 500 daily OHLCV data sourced from public market data) is intended solely for educational and research purposes within the scope of this course. + +Participants agree to: + + Use the data only for the purpose of this challenge. + Not redistribute the data outside the course or upload it to public repositories. + Not attempt to identify, reverse-engineer, or misuse the data beyond its intended scientific context. + +3. Training and Evaluation Restrictions + +Participants must comply with the following rules: + + Only the provided training split may be used for training and validation. + The test and private test splits are reserved for evaluation only and must not be used in the training loop. + Any attempt to directly or indirectly train on evaluation data will result in disqualification. + +4. Submission Rules + +Participants must submit: + + A single file named `submission.py` containing a `get_model(train_loader)` function, zipped as `submission.zip` (see the **Seed** page for a full working template). + Any accompanying code or documentation as specified by the instructors. + +Submissions must be the original work of the participants. +5. Academic Integrity + +Participants are expected to adhere to the École Polytechnique’s academic integrity policies. + +Specifically: + + Plagiarism, including copying code or solutions without proper attribution, is prohibited. + The use of external libraries and pre-trained models is allowed unless otherwise stated, provided their use is clearly documented. + Collaboration between teams is not allowed unless explicitly permitted by the instructors. + +Violations of academic integrity rules may result in penalties, including disqualification or academic sanctions. +6. Intellectual Property + +Participants retain ownership of the code and models they develop as part of this challenge. + +By submitting their results, participants grant the course instructors and the hosting institution a non-exclusive, royalty-free right to: + + Use the submissions for grading and evaluation. + Use anonymized results or visualizations for teaching, presentations, or future course materials. + +7. Liability + +The organizers provide the data and evaluation infrastructure “as is” and make no guarantees regarding accuracy, completeness, or fitness for a particular purpose. + +The organizers are not responsible for: + + Technical issues, data loss, or submission failures. + Any damages or losses arising from participation in the challenge. + +8. Modification and Termination + +The organizers reserve the right to: + + Modify the challenge rules, datasets, or evaluation criteria if necessary. + Terminate the challenge in case of technical issues or unforeseen circumstances. + +Any changes will be communicated to participants in a timely manner. +9. Acceptance of Terms + +By participating in the challenge, participants acknowledge that they have read, understood, and agreed to these Terms and Conditions. diff --git a/pages/timeline.md b/pages/timeline.md index 4e613bf..5ef189a 100644 --- a/pages/timeline.md +++ b/pages/timeline.md @@ -1,11 +1,27 @@ # Timeline -## Development phase -This phase should be used to tune your models, testing against a small -set of testing data - -## Final phase -Resubmit your preferred submission from the development phase to test -against a new set of testing data. Your model will be trained against -the same set of training data as in the development phase. You may only -make one submission to this phase, so choose wisely. +## Development Phase — October 7, 2025 → March 31, 2026 + +The development phase is open for the full duration of the course. + +- **Training data**: historical S&P 500 daily OHLCV data (roughly 2006–2022). +- **Public test set**: a held-out window of ~250 trading days (~2022–2023). + Your submission is scored against this set after every submission. +- **Submissions**: unlimited — iterate freely and track your progress on the leaderboard. +- **Goal**: tune your model architecture, features, and hyper-parameters to maximise ROC-AUC on the public test set. + +## Private Leaderboard — revealed at end of Development Phase + +Once the development phase closes on **March 31, 2026**, the private test set +(a further ~250 trading days, ~2023–2024) is scored for all submissions. +Final rankings are based on the **private test ROC-AUC**. + +The private test set is completely hidden during the development phase — optimising solely for the public leaderboard may not generalise. + +## Key Dates + +| Date | Event | +|------|-------| +| October 7, 2025 | Competition opens, development phase begins | +| March 31, 2026 | Development phase closes, no further submissions accepted | +| Early April 2026 | Private leaderboard revealed, final rankings published | diff --git a/raw_data/.gitkeep b/raw_data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index fda4dd6..2956b1e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,11 @@ -pandas +# ── Core runtime (ingestion + scoring + submission) ─────────────────────────── +numpy==2.2.6 +pandas==2.3.3 scikit-learn + +# PyTorch CPU build — participants may swap for a GPU wheel if needed +torch + +# ── Local development tools ─────────────────────────────────────────────────── +# Required only for tools/run_docker.py (not installed inside the Docker image) +docker diff --git a/scoring_program/metadata.yaml b/scoring_program/metadata.yaml index 1dfabf8..fabbca7 100755 --- a/scoring_program/metadata.yaml +++ b/scoring_program/metadata.yaml @@ -1 +1,2 @@ -command: python3 scoring.py \ No newline at end of file +command: python3 scoring.py +image: nicolasnoya2001/sp500-challenge:v2 diff --git a/scoring_program/scoring.py b/scoring_program/scoring.py index 701f78f..27b8f53 100755 --- a/scoring_program/scoring.py +++ b/scoring_program/scoring.py @@ -2,40 +2,41 @@ from pathlib import Path import pandas as pd +from sklearn.metrics import roc_auc_score EVAL_SETS = ["test", "private_test"] -def compute_accuracy(predictions, targets): - # Make sure there is no NaN, as pandas ignores them in mean computation - predictions = predictions.fillna(-10).values - # Return mean of correct predictions - return (predictions == targets.values).mean() +def compute_roc_auc(predictions, targets): + # Flatten to 1D — .values on a single-column DataFrame gives shape (n, 1) + preds = predictions.iloc[:, 0].fillna(0.5).to_numpy() + # Return ROC AUC score + return roc_auc_score(targets, preds) def main(reference_dir, prediction_dir, output_dir): scores = {} for eval_set in EVAL_SETS: - print(f'Scoring {eval_set}') + print(f"Scoring {eval_set}") predictions = pd.read_csv( - prediction_dir / f'{eval_set}_predictions.csv' - ) - targets = pd.read_csv( - reference_dir / f'{eval_set}_labels.csv' + prediction_dir / f"{eval_set}_predictions.csv" ) + targets = pd.read_csv(reference_dir / f"{eval_set}_labels.csv") - scores[eval_set] = float(compute_accuracy(predictions, targets)) + scores[eval_set] = float( + compute_roc_auc(predictions, targets["Target"].values) + ) # Add train and test times in the score - json_durations = (prediction_dir / 'metadata.json').read_text() + json_durations = (prediction_dir / "metadata.json").read_text() durations = json.loads(json_durations) scores.update(**durations) print(scores) # Write output scores output_dir.mkdir(parents=True, exist_ok=True) - (output_dir / 'scores.json').write_text(json.dumps(scores)) + (output_dir / "scores.json").write_text(json.dumps(scores)) if __name__ == "__main__": @@ -68,5 +69,5 @@ def main(reference_dir, prediction_dir, output_dir): main( Path(args.reference_dir), Path(args.prediction_dir), - Path(args.output_dir) + Path(args.output_dir), ) diff --git a/solution/submission.py b/solution/submission.py index a8076b0..436486b 100644 --- a/solution/submission.py +++ b/solution/submission.py @@ -1,7 +1,108 @@ -from sklearn.ensemble import RandomForestClassifier +""" +Reference LSTM baseline for the S&P 500 direction-forecasting challenge. +The ingestion program will call: -# The submission here should simply be a function that returns a model -# compatible with scikit-learn API -def get_model(): - return RandomForestClassifier() + model = get_model(train_loader) + +where `train_loader` is a torch.utils.data.DataLoader that yields +(x, y) batches with: + x : FloatTensor of shape (batch, WINDOW_SIZE, n_features) + y : FloatTensor of shape (batch,) — binary labels (1 = up, 0 = down) + +`get_model` must return a trained torch.nn.Module whose forward pass accepts +a tensor of shape (batch, WINDOW_SIZE, n_features) and returns probabilities +in [0, 1] of shape (batch,). The ingestion program applies a 0.5 threshold. +""" + +import torch +import torch.nn as nn + + +# ── Hyper-parameters (feel free to tune) ───────────────────────────────────── +HIDDEN_SIZE = 128 +NUM_LAYERS = 3 +DROPOUT = 0.1 +N_EPOCHS = 3 +LEARNING_RATE = 1e-4 +# ───────────────────────────────────────────────────────────────────────────── + + +class LSTMClassifier(nn.Module): + """Sequence-to-one LSTM for binary direction prediction. + + Takes a window of shape (batch, seq_len, input_size) and returns + a scalar logit per sample (shape: (batch,)). + + Architecture + ------------ + LSTM (num_layers, hidden_size, dropout) → hidden state of last timestep + → Linear(hidden_size → 1) → squeeze → Sigmoid → probability in [0, 1] + """ + + def __init__( + self, + input_size: int, + hidden_size: int = HIDDEN_SIZE, + num_layers: int = NUM_LAYERS, + dropout: float = DROPOUT, + ): + super().__init__() + self.lstm = nn.LSTM( + input_size=input_size, + hidden_size=hidden_size, + num_layers=num_layers, + batch_first=True, + dropout=dropout if num_layers > 1 else 0.0, + ) + self.head = nn.Linear(hidden_size, 1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + # x: (batch, seq_len, input_size) + out, _ = self.lstm(x) # (batch, seq_len, hidden_size) + last = out[:, -1, :] # (batch, hidden_size) — last timestep + logit = self.head(last).squeeze(-1) # (batch,) + return torch.sigmoid(logit) # (batch,) — probability in [0, 1] + + +def get_model(train_loader: torch.utils.data.DataLoader) -> nn.Module: + """Train an LSTM on the provided DataLoader and return the trained model. + + Parameters + ---------- + train_loader : DataLoader + Yields (x, y) batches where x has shape (batch, WINDOW_SIZE, n_features) + and y has shape (batch,) with values in {0, 1}. + + Returns + ------- + model : nn.Module (in eval mode) + Trained LSTMClassifier whose forward pass returns probabilities in [0, 1]. + """ + # Infer input size from the first batch + x_sample, _ = next(iter(train_loader)) + input_size = x_sample.shape[-1] # n_features + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"Training on: {device}") + + model = LSTMClassifier(input_size=input_size).to(device) + optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) + criterion = nn.BCELoss() # model already applies sigmoid + + model.train() + for epoch in range(N_EPOCHS): + total_loss = 0.0 + for x, y in train_loader: + x, y = x.to(device), y.to(device) + optimizer.zero_grad() + probs = model(x) # (batch,) — probabilities in [0, 1] + loss = criterion(probs, y) + loss.backward() + optimizer.step() + total_loss += loss.item() + avg_loss = total_loss / len(train_loader) + print(f" Epoch {epoch + 1:>2}/{N_EPOCHS} loss={avg_loss:.4f}") + + model.eval() + return model diff --git a/template_starting_kit.ipynb b/template_starting_kit.ipynb index 7167a3a..2434ef9 100644 --- a/template_starting_kit.ipynb +++ b/template_starting_kit.ipynb @@ -4,17 +4,15 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "
\n", + "# Directional Forecasting of the S&P 500 Index\n", "\n", - "# Template Kit for Cadabench challenge in the Datacamp\n", + "*DataCamp Challenge — École Polytechnique (INF554 / MAP583)*\n", "\n", - " Thomas Moreau (Inria)