x-datascience-datacamp · NicolasNoya · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@
 ingestion_res/*
 scoring_res/*
 dev_phase/*
+*.pth
diff --git a/competition.yaml b/competition.yaml
@@ -1,8 +1,26 @@
 version: 2
-title: Templat competition - Dummy classification
-description: Dummy classification task
+title: "Autoregressive Forecasting of the S&P 500 Index"
+description: >
+  Can you predict whether the S&P 500 will close up or down — using only what you know by mid-morning?
+
+  Each trading day, participants receive a feature vector built from:
+    - Intraday morning signals: the day's open price and early price action
+      (e.g. open-to-first-hour return, morning high/low range, opening gap vs previous close).
+    - Historical context: past N days of daily OHLCV data, log-returns, and
+      rolling statistics (volatility, momentum) up to and including the previous close.
+
+  The target label is binary: **1** if the day's close is strictly above the previous close,
+  **0** otherwise. No look-ahead is permitted — only information available before noon (ET)
+  may be used as features for the current day.
+
+  Participants submit a scikit-learn–compatible model via a `submission.py` file
+  exposing a `get_model()` function. The model is trained server-side on historical
+  data and evaluated on a held-out test window using **directional accuracy**
+  (fraction of days where the predicted direction matches the actual close direction).
+
+  This is a DataCamp challenge organised at École Polytechnique (INF554 / MAP583).
 image: logo.png
-registration_auto_approve: False  # if True, do not require approval from admin to join the comp
+registration_auto_approve: False  # set to True to skip manual approval
 
 terms: pages/terms.md
 pages:
@@ -15,8 +33,15 @@ pages:
 
 tasks:
   - index: 0
-    name: Developement Task
-    description: 'Tune models with training data, test against examples contained in public test data'
+    name: Development Task
+    description: >
+      Same-day close direction forecasting of the S&P 500 using morning information.
+      Each sample consists of: (i) intraday morning features for the current trading day
+      (opening gap, open price, early price action) and (ii) historical daily features
+      from the past N sessions (log-returns, OHLCV, rolling volatility, momentum).
+      The label is 1 if today's close > previous close, 0 otherwise.
+      No information after the morning window may be used; models are scored on
+      directional accuracy over a public held-out test window.
     input_data: dev_phase/input_data/
     reference_data: dev_phase/reference_data/
     ingestion_program: ingestion_program/
@@ -25,13 +50,15 @@ tasks:
 solutions:
   - index: 0
     tasks:
-    - 0
+      - 0
     path: solution/
 
-
 phases:
   - name: Development Phase
-    description: 'Development phase: tune your models.'
+    description: >
+      Tune and validate your autoregressive model using the provided historical
+      S&P 500 training data. Your predictions are scored against a public test set
+      so you can iterate quickly. Unlimited submissions are allowed in this phase.
     start: 10-07-2025
     end: 03-31-2026
     tasks:
@@ -41,20 +68,20 @@ leaderboards:
   - title: Results
     key: main
     columns:
-      - title: Test Accuracy
+      - title: Directional Accuracy (public test)
         key: test
         index: 0
-        sorting: asc
-      - title: Private Test Accuracy
+        sorting: desc          # higher is better
+      - title: Directional Accuracy (private test)
         key: private_test
         index: 1
-        sorting: asc
-        hidden: True
-      - title: Train time
+        sorting: desc
+        hidden: True           # revealed only after the phase ends
+      - title: Train Time (s)
         key: train_time
         index: 2
-        sorting: desc
-      - title: Test time
+        sorting: asc           # lower is better
+      - title: Predict Time (s)
         key: test_time
         index: 3
-        sorting: desc
+        sorting: asc
diff --git a/ingestion_program/ingestion.py b/ingestion_program/ingestion.py
@@ -3,53 +3,156 @@
 import time
 from pathlib import Path
 
+import numpy as np
 import pandas as pd
+import torch
 
+# Number of past trading days fed as a sequence to the model.
+# Must be consistent between training and inference.
+WINDOW_SIZE = 50
 
 EVAL_SETS = ["test", "private_test"]
 
 
-def evaluate_model(model, X_test):
-
-    y_pred = model.predict(X_test)
-    return pd.DataFrame(y_pred)
+class SP500Dataset(torch.utils.data.Dataset):
+    """PyTorch Dataset for the S&P 500 direction-forecasting challenge.
+
+    Each sample is a sliding window of shape (WINDOW_SIZE, n_features)
+    ending at day `idx`. The target is the binary label of that last day
+    (1 = close > prev_close, 0 otherwise).
+
+    For the first WINDOW_SIZE-1 days, the window is left-padded with zeros.
+
+    Parameters
+    ----------
+    features_path : Path
+        Path to the features CSV (columns = feature names, rows = trading days
+        in chronological order).
+    labels_path : Path or None
+        Path to the labels CSV (single column, same row order as features).
+        Pass None for test sets where labels are withheld.
+    window_size : int
+        Number of past days (inclusive of the current day) in each sequence.
+    """
+
+    def __init__(
+        self, features_path, labels_path=None, window_size=WINDOW_SIZE
+    ):
+        self.window_size = window_size
+        # index_col=0: the first column is the row index saved by setup_data.py,
+        # not a feature — must be excluded from the data arrays.
+        self.X = pd.read_csv(features_path, index_col=0).values.astype(
+            np.float32
+        )
+        self.n_features = self.X.shape[1]
+        if labels_path is not None:
+            self.y = (
+                pd.read_csv(labels_path, index_col=0)
+                .values.astype(np.float32)
+                .ravel()
+            )
+        else:
+            self.y = None  # test mode — labels are unknown
+
+    def __len__(self):
+        return len(self.X)
+
+    def __getitem__(self, idx):
+        """Return (window, label) where window has shape (window_size, n_features).
+
+        The label is the binary target for day `idx` (the last day of the window).
+        During test mode (no labels), only the window tensor is returned.
+        """
+        window_start = max(0, idx - self.window_size + 1)
+        window = self.X[window_start : idx + 1]  # (<=window_size, n_features)
+
+        # Left-pad with zeros if we are at the beginning of the series
+        if len(window) < self.window_size:
+            padding = np.zeros(
+                (self.window_size - len(window), self.n_features),
+                dtype=np.float32,
+            )
+            window = np.concatenate([padding, window], axis=0)
+
+        x = torch.tensor(
+            window, dtype=torch.float32
+        )  # (window_size, n_features)
+
+        if self.y is not None:
+            y = torch.tensor(self.y[idx], dtype=torch.float32)  # scalar
+            return x, y
+        return x  # test mode
+
+
+def get_train_dataset(data_dir):
+    """Build the training Dataset from separate features and labels CSVs."""
+    data_dir = Path(data_dir)
+    features_path = data_dir / "train" / "train_features.csv"
+    labels_path = data_dir / "train" / "train_labels.csv"
+    return SP500Dataset(features_path, labels_path)
 
 
-def get_train_data(data_dir):
+def get_test_dataset(data_dir, eval_set):
+    """Build a test Dataset (no labels) for a given evaluation split."""
     data_dir = Path(data_dir)
-    training_dir = data_dir / "train"
-    X_train = pd.read_csv(training_dir / "train_features.csv")
-    y_train = pd.read_csv(training_dir / "train_labels.csv")
-    return X_train, y_train
+    features_path = data_dir / eval_set / f"{eval_set}_features.csv"
+    return SP500Dataset(features_path, labels_path=None)
 
 
-def main(data_dir, output_dir):
-    # Here, you can import info from the submission module, to evaluate the
-    # submission
-    from submission import get_model
+def evaluate_model(model, test_dataset):
+    """Run inference over a test Dataset and return a DataFrame of probabilities.
+
+    The model outputs probabilities in [0, 1] (sigmoid already applied).
+    The scoring program is responsible for applying the decision threshold.
+    """
+    device = next(model.parameters()).device
+    loader = torch.utils.data.DataLoader(
+        test_dataset, batch_size=64, shuffle=False
+    )
+    probs = []
+    model.eval()
+    with torch.no_grad():
+        for x in loader:
+            # test_dataset returns bare tensors (no label) — x is already the input
+            x = x.to(device)
+            batch_probs = model(x).cpu().numpy().tolist()  # floats in [0, 1]
+            probs.extend(batch_probs)
+    return pd.DataFrame({"Probability": probs})
 
-    X_train, y_train = get_train_data(data_dir)
 
-    print("Training the model")
+def main(data_dir, output_dir):
+    from submission import (
+        get_model,
+    )  # imported here so sys.path is set first
 
-    model = get_model()
+    data_dir = Path(data_dir)
+    output_dir = Path(output_dir)
 
+    # ── Training ──────────────────────────────────────────────────────────────
+    train_dataset = get_train_dataset(data_dir)
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=32, shuffle=True
+    )
+
+    print("Training the model")
     start = time.time()
-    model.fit(X_train, y_train)
+    model = get_model(train_loader)  # participant trains and returns the model
     train_time = time.time() - start
-    print("-" * 10)
+
+    # ── Evaluation ────────────────────────────────────────────────────────────
+    print("=" * 40)
     print("Evaluate the model")
     start = time.time()
     res = {}
     for eval_set in EVAL_SETS:
-        X_test = pd.read_csv(data_dir / eval_set / f"{eval_set}_features.csv")
-        res[eval_set] = evaluate_model(model, X_test)
+        test_dataset = get_test_dataset(data_dir, eval_set)
+        res[eval_set] = evaluate_model(model, test_dataset)
     test_time = time.time() - start
-    print("-" * 10)
-    duration = train_time + test_time
-    print(f"Completed Prediction. Total duration: {duration}")
+    print(
+        f"Completed Prediction. Total duration: {train_time + test_time:.1f}s"
+    )
 
-    # Write output files
+    # ── Write outputs ─────────────────────────────────────────────────────────
     output_dir.mkdir(parents=True, exist_ok=True)
     with open(output_dir / "metadata.json", "w+") as f:
         json.dump(dict(train_time=train_time, test_time=test_time), f)
@@ -69,19 +172,19 @@ def main(data_dir, output_dir):
     parser.add_argument(
         "--data-dir",
         type=str,
-        default="/app/input_data",
-        help="",
+        default="dev_phase/input_data",
+        help="Root folder containing train/, test/, and private_test/ splits.",
     )
     parser.add_argument(
         "--output-dir",
         type=str,
-        default="/app/output",
-        help="",
+        default="ingestion_res",
+        help="Folder where prediction CSVs and metadata.json will be written.",
     )
     parser.add_argument(
         "--submission-dir",
         type=str,
-        default="/app/ingested_program",
+        default="solution",
         help="",
     )
 

diff --git a/logo.png b/logo.png
diff --git a/raw_data/.gitkeep b/raw_data/.gitkeep
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,11 @@
-pandas
-scikit-learn
+# ── Core runtime (ingestion + scoring + submission) ───────────────────────────
+numpy==2.2.6
+pandas==2.3.3
+scikit-learn==1.7.2
+
+# PyTorch CPU build — participants may swap for a GPU wheel if needed
+torch==2.8.0
+
+# ── Local development tools ───────────────────────────────────────────────────
+# Required only for tools/run_docker.py (not installed inside the Docker image)
+docker
diff --git a/scoring_program/scoring.py b/scoring_program/scoring.py
@@ -2,40 +2,39 @@
 from pathlib import Path
 
 import pandas as pd
+from sklearn.metrics import roc_auc_score
 
 EVAL_SETS = ["test", "private_test"]
 
 
-def compute_accuracy(predictions, targets):
-    # Make sure there is no NaN, as pandas ignores them in mean computation
-    predictions = predictions.fillna(-10).values
-    # Return mean of correct predictions
-    return (predictions == targets.values).mean()
+def compute_roc_auc(predictions, targets):
+    # Make sure there is no NaN
+    predictions = predictions.fillna(0.5).values
+    # Return ROC AUC score
+    return roc_auc_score(targets.values, predictions)
 
 
 def main(reference_dir, prediction_dir, output_dir):
     scores = {}
     for eval_set in EVAL_SETS:
-        print(f'Scoring {eval_set}')
+        print(f"Scoring {eval_set}")
 
         predictions = pd.read_csv(
-            prediction_dir / f'{eval_set}_predictions.csv'
-        )
-        targets = pd.read_csv(
-            reference_dir / f'{eval_set}_labels.csv'
+            prediction_dir / f"{eval_set}_predictions.csv"
         )
+        targets = pd.read_csv(reference_dir / f"{eval_set}_labels.csv")
 
-        scores[eval_set] = float(compute_accuracy(predictions, targets))
+        scores[eval_set] = float(compute_roc_auc(predictions, targets))
 
     # Add train and test times in the score
-    json_durations = (prediction_dir / 'metadata.json').read_text()
+    json_durations = (prediction_dir / "metadata.json").read_text()
     durations = json.loads(json_durations)
     scores.update(**durations)
     print(scores)
 
     # Write output scores
     output_dir.mkdir(parents=True, exist_ok=True)
-    (output_dir / 'scores.json').write_text(json.dumps(scores))
+    (output_dir / "scores.json").write_text(json.dumps(scores))
 
 
 if __name__ == "__main__":
@@ -68,5 +67,5 @@ def main(reference_dir, prediction_dir, output_dir):
     main(
         Path(args.reference_dir),
         Path(args.prediction_dir),
-        Path(args.output_dir)
+        Path(args.output_dir),
     )