Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
ingestion_res/*
scoring_res/*
dev_phase/*
*.pth
61 changes: 44 additions & 17 deletions competition.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
version: 2
title: Templat competition - Dummy classification
description: Dummy classification task
title: "Autoregressive Forecasting of the S&P 500 Index"
description: >
Can you predict whether the S&P 500 will close up or down — using only what you know by mid-morning?

Each trading day, participants receive a feature vector built from:
- Intraday morning signals: the day's open price and early price action
(e.g. open-to-first-hour return, morning high/low range, opening gap vs previous close).
- Historical context: past N days of daily OHLCV data, log-returns, and
rolling statistics (volatility, momentum) up to and including the previous close.

The target label is binary: **1** if the day's close is strictly above the previous close,
**0** otherwise. No look-ahead is permitted — only information available before noon (ET)
may be used as features for the current day.

Participants submit a scikit-learn–compatible model via a `submission.py` file
exposing a `get_model()` function. The model is trained server-side on historical
data and evaluated on a held-out test window using **directional accuracy**
(fraction of days where the predicted direction matches the actual close direction).

This is a DataCamp challenge organised at École Polytechnique (INF554 / MAP583).
image: logo.png
registration_auto_approve: False # if True, do not require approval from admin to join the comp
registration_auto_approve: False # set to True to skip manual approval

terms: pages/terms.md
pages:
Expand All @@ -15,8 +33,15 @@ pages:

tasks:
- index: 0
name: Developement Task
description: 'Tune models with training data, test against examples contained in public test data'
name: Development Task
description: >
Same-day close direction forecasting of the S&P 500 using morning information.
Each sample consists of: (i) intraday morning features for the current trading day
(opening gap, open price, early price action) and (ii) historical daily features
from the past N sessions (log-returns, OHLCV, rolling volatility, momentum).
The label is 1 if today's close > previous close, 0 otherwise.
No information after the morning window may be used; models are scored on
directional accuracy over a public held-out test window.
input_data: dev_phase/input_data/
reference_data: dev_phase/reference_data/
ingestion_program: ingestion_program/
Expand All @@ -25,13 +50,15 @@ tasks:
solutions:
- index: 0
tasks:
- 0
- 0
path: solution/


phases:
- name: Development Phase
description: 'Development phase: tune your models.'
description: >
Tune and validate your autoregressive model using the provided historical
S&P 500 training data. Your predictions are scored against a public test set
so you can iterate quickly. Unlimited submissions are allowed in this phase.
start: 10-07-2025
end: 03-31-2026
tasks:
Expand All @@ -41,20 +68,20 @@ leaderboards:
- title: Results
key: main
columns:
- title: Test Accuracy
- title: Directional Accuracy (public test)
key: test
index: 0
sorting: asc
- title: Private Test Accuracy
sorting: desc # higher is better
- title: Directional Accuracy (private test)
key: private_test
index: 1
sorting: asc
hidden: True
- title: Train time
sorting: desc
hidden: True # revealed only after the phase ends
- title: Train Time (s)
key: train_time
index: 2
sorting: desc
- title: Test time
sorting: asc # lower is better
- title: Predict Time (s)
key: test_time
index: 3
sorting: desc
sorting: asc
161 changes: 132 additions & 29 deletions ingestion_program/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,53 +3,156 @@
import time
from pathlib import Path

import numpy as np
import pandas as pd
import torch

# Number of past trading days fed as a sequence to the model.
# Must be consistent between training and inference.
WINDOW_SIZE = 50

EVAL_SETS = ["test", "private_test"]


def evaluate_model(model, X_test):

y_pred = model.predict(X_test)
return pd.DataFrame(y_pred)
class SP500Dataset(torch.utils.data.Dataset):
"""PyTorch Dataset for the S&P 500 direction-forecasting challenge.

Each sample is a sliding window of shape (WINDOW_SIZE, n_features)
ending at day `idx`. The target is the binary label of that last day
(1 = close > prev_close, 0 otherwise).

For the first WINDOW_SIZE-1 days, the window is left-padded with zeros.

Parameters
----------
features_path : Path
Path to the features CSV (columns = feature names, rows = trading days
in chronological order).
labels_path : Path or None
Path to the labels CSV (single column, same row order as features).
Pass None for test sets where labels are withheld.
window_size : int
Number of past days (inclusive of the current day) in each sequence.
"""

def __init__(
self, features_path, labels_path=None, window_size=WINDOW_SIZE
):
self.window_size = window_size
# index_col=0: the first column is the row index saved by setup_data.py,
# not a feature — must be excluded from the data arrays.
self.X = pd.read_csv(features_path, index_col=0).values.astype(
np.float32
)
self.n_features = self.X.shape[1]
if labels_path is not None:
self.y = (
pd.read_csv(labels_path, index_col=0)
.values.astype(np.float32)
.ravel()
)
else:
self.y = None # test mode — labels are unknown

def __len__(self):
return len(self.X)

def __getitem__(self, idx):
"""Return (window, label) where window has shape (window_size, n_features).

The label is the binary target for day `idx` (the last day of the window).
During test mode (no labels), only the window tensor is returned.
"""
window_start = max(0, idx - self.window_size + 1)
window = self.X[window_start : idx + 1] # (<=window_size, n_features)

# Left-pad with zeros if we are at the beginning of the series
if len(window) < self.window_size:
padding = np.zeros(
(self.window_size - len(window), self.n_features),
dtype=np.float32,
)
window = np.concatenate([padding, window], axis=0)

x = torch.tensor(
window, dtype=torch.float32
) # (window_size, n_features)

if self.y is not None:
y = torch.tensor(self.y[idx], dtype=torch.float32) # scalar
return x, y
return x # test mode


def get_train_dataset(data_dir):
"""Build the training Dataset from separate features and labels CSVs."""
data_dir = Path(data_dir)
features_path = data_dir / "train" / "train_features.csv"
labels_path = data_dir / "train" / "train_labels.csv"
return SP500Dataset(features_path, labels_path)


def get_train_data(data_dir):
def get_test_dataset(data_dir, eval_set):
"""Build a test Dataset (no labels) for a given evaluation split."""
data_dir = Path(data_dir)
training_dir = data_dir / "train"
X_train = pd.read_csv(training_dir / "train_features.csv")
y_train = pd.read_csv(training_dir / "train_labels.csv")
return X_train, y_train
features_path = data_dir / eval_set / f"{eval_set}_features.csv"
return SP500Dataset(features_path, labels_path=None)


def main(data_dir, output_dir):
# Here, you can import info from the submission module, to evaluate the
# submission
from submission import get_model
def evaluate_model(model, test_dataset):
"""Run inference over a test Dataset and return a DataFrame of probabilities.

The model outputs probabilities in [0, 1] (sigmoid already applied).
The scoring program is responsible for applying the decision threshold.
"""
device = next(model.parameters()).device
loader = torch.utils.data.DataLoader(
test_dataset, batch_size=64, shuffle=False
)
probs = []
model.eval()
with torch.no_grad():
for x in loader:
# test_dataset returns bare tensors (no label) — x is already the input
x = x.to(device)
batch_probs = model(x).cpu().numpy().tolist() # floats in [0, 1]
probs.extend(batch_probs)
return pd.DataFrame({"Probability": probs})

X_train, y_train = get_train_data(data_dir)

print("Training the model")
def main(data_dir, output_dir):
from submission import (
get_model,
) # imported here so sys.path is set first

model = get_model()
data_dir = Path(data_dir)
output_dir = Path(output_dir)

# ── Training ──────────────────────────────────────────────────────────────
train_dataset = get_train_dataset(data_dir)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=32, shuffle=True
)

print("Training the model")
start = time.time()
model.fit(X_train, y_train)
model = get_model(train_loader) # participant trains and returns the model
train_time = time.time() - start
print("-" * 10)

# ── Evaluation ────────────────────────────────────────────────────────────
print("=" * 40)
print("Evaluate the model")
start = time.time()
res = {}
for eval_set in EVAL_SETS:
X_test = pd.read_csv(data_dir / eval_set / f"{eval_set}_features.csv")
res[eval_set] = evaluate_model(model, X_test)
test_dataset = get_test_dataset(data_dir, eval_set)
res[eval_set] = evaluate_model(model, test_dataset)
test_time = time.time() - start
print("-" * 10)
duration = train_time + test_time
print(f"Completed Prediction. Total duration: {duration}")
print(
f"Completed Prediction. Total duration: {train_time + test_time:.1f}s"
)

# Write output files
# ── Write outputs ─────────────────────────────────────────────────────────
output_dir.mkdir(parents=True, exist_ok=True)
with open(output_dir / "metadata.json", "w+") as f:
json.dump(dict(train_time=train_time, test_time=test_time), f)
Expand All @@ -69,19 +172,19 @@ def main(data_dir, output_dir):
parser.add_argument(
"--data-dir",
type=str,
default="/app/input_data",
help="",
default="dev_phase/input_data",
help="Root folder containing train/, test/, and private_test/ splits.",
)
parser.add_argument(
"--output-dir",
type=str,
default="/app/output",
help="",
default="ingestion_res",
help="Folder where prediction CSVs and metadata.json will be written.",
)
parser.add_argument(
"--submission-dir",
type=str,
default="/app/ingested_program",
default="solution",
help="",
)

Expand Down
Binary file modified logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file added raw_data/.gitkeep
Empty file.
13 changes: 11 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
pandas
scikit-learn
# ── Core runtime (ingestion + scoring + submission) ───────────────────────────
numpy==2.2.6
pandas==2.3.3
scikit-learn==1.7.2

# PyTorch CPU build — participants may swap for a GPU wheel if needed
torch==2.8.0

# ── Local development tools ───────────────────────────────────────────────────
# Required only for tools/run_docker.py (not installed inside the Docker image)
docker
27 changes: 13 additions & 14 deletions scoring_program/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,39 @@
from pathlib import Path

import pandas as pd
from sklearn.metrics import roc_auc_score

EVAL_SETS = ["test", "private_test"]


def compute_accuracy(predictions, targets):
# Make sure there is no NaN, as pandas ignores them in mean computation
predictions = predictions.fillna(-10).values
# Return mean of correct predictions
return (predictions == targets.values).mean()
def compute_roc_auc(predictions, targets):
# Make sure there is no NaN
predictions = predictions.fillna(0.5).values
# Return ROC AUC score
return roc_auc_score(targets.values, predictions)


def main(reference_dir, prediction_dir, output_dir):
scores = {}
for eval_set in EVAL_SETS:
print(f'Scoring {eval_set}')
print(f"Scoring {eval_set}")

predictions = pd.read_csv(
prediction_dir / f'{eval_set}_predictions.csv'
)
targets = pd.read_csv(
reference_dir / f'{eval_set}_labels.csv'
prediction_dir / f"{eval_set}_predictions.csv"
)
targets = pd.read_csv(reference_dir / f"{eval_set}_labels.csv")

scores[eval_set] = float(compute_accuracy(predictions, targets))
scores[eval_set] = float(compute_roc_auc(predictions, targets))

# Add train and test times in the score
json_durations = (prediction_dir / 'metadata.json').read_text()
json_durations = (prediction_dir / "metadata.json").read_text()
durations = json.loads(json_durations)
scores.update(**durations)
print(scores)

# Write output scores
output_dir.mkdir(parents=True, exist_ok=True)
(output_dir / 'scores.json').write_text(json.dumps(scores))
(output_dir / "scores.json").write_text(json.dumps(scores))


if __name__ == "__main__":
Expand Down Expand Up @@ -68,5 +67,5 @@ def main(reference_dir, prediction_dir, output_dir):
main(
Path(args.reference_dir),
Path(args.prediction_dir),
Path(args.output_dir)
Path(args.output_dir),
)
Loading
Loading