Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 225 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#self files
.pdf



# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
*.lcov
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
# Pipfile.lock

# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# uv.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
# poetry.lock
# poetry.toml

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
# pdm.lock
# pdm.toml
.pdm-python
.pdm-build/

# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
# pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi/*
!.pixi/config.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule*
celerybeat.pid

# Redis
*.rdb
*.aof
*.pid

# RabbitMQ
mnesia/
rabbitmq/
rabbitmq-data/

# ActiveMQ
activemq-data/

# SageMath parsed files
*.sage.py

# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
# .idea/

# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/

# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Temporary file for partial code execution
tempCodeRunnerFile.py

# Ruff stuff:
.ruff_cache/

# PyPI configuration file
.pypirc

# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/

# Streamlit
.streamlit/secrets.toml
14 changes: 10 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
# Hedge Fund Risk Modeling & Semi-Automated Trading System

## Team Information
- **Team Name**: [Team Name]
- **Year**: [Year]
- **All-Female Team**: [Yes/No]

- **Team Name**: Paneer_Lovers
- **Year**: 2
- **All-Female Team**: No
## Architecture Overview

#### Describe your approach here. Keep it short and clear.

- How does your system ingest and preprocess the varying data sources (market, macro, sentiment)?
The system uses `ingestion.py` to merge four datasets (Equity, Macro, Oil, Multi-Asset) via an inner join on the 'Date' index. `preprocessing.py` standardizes scales using Z-score Normalization and applies Winsorization (±3 sigma) to clip extreme outliers, preventing bias. We engineer features like Rolling Volatility, Momentum, and Cross-Asset Correlation.

- What risk modeling techniques were selected, and how are they integrated into the trading decision pipeline?
We implemented Historical VaR, Parametric VaR, Conditional VaR (Expected Shortfall), and Maximum Drawdown. These are integrated as "Safety Overlays." If 20-day rolling volatility breaches 25%, the system overrides our Machine Learning signals and executes an emergency "Risk-Off" shift into Gold and Cash.

- How does your semi-automated strategy generate signals while respecting portfolio constraints and handling realistic conditions like slippage?
An `MLSignalEngine` generates portfolio targets using a Random Forest model tuned via TimeSeriesSplit to prevent look-ahead bias. The Portfolio manager executes targets while enforcing `max_position_pct` constraints. It simulates 0.1% Commission and 0.05% Slippage, and utilizes a "Significance Filter" (>5% deviation) to avoid excessive trading fees.

- How is the dashboard designed to provide explainable insights and key metrics (Sharpe, drawdown) to stakeholders?
The interactive Streamlit dashboard provides live NAV Line Charts and Drawdown Visualizations compared to a Buy-and-Hold benchmark. It computes live risk-adjusted metrics (Sharpe, Sortino, Calmar, Alpha, Beta). Most importantly, it features an immutable Trade Audit Log displaying the exact algorithmic probability or rule that triggered every transaction.

**Note:** Please do not change the format or spelling of anything in this README. The fields are extracted using a script, so any changes to the structure or formatting may break the extraction process.
144 changes: 144 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import streamlit as st
import pandas as pd
import numpy as np
import os
import sys
from pathlib import Path

# Setup Path
ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(ROOT / "src"))

from ingestion import load_master
from preprocessing import preprocess
from risk import compute_all_risk_metrics
from portfolio import Portfolio
from signals import MLSignalEngine

# Use full page width
st.set_page_config(layout="wide", page_title="Hedge Fund Risk Dashboard")

st.title("📈 Hedge Fund Risk & Trading Dashboard")
st.markdown("Interactive backtesting dashboard evaluating a **Risk-Aware Trend Following** strategy against a standard **Buy-and-Hold** approach.")

# --- Data Loading (Cached) ---
@st.cache_data
def get_data():
raw = load_master()
df, scaler = preprocess(raw)
return raw, df

raw_df, df = get_data()

# --- Simulation Runner (Cached) ---
@st.cache_data
def run_simulations(df):
# Strategy
strat_port = Portfolio(initial_capital=100_000, max_position_pct=0.95)
engine = MLSignalEngine()

last_rebalance_idx = -999


for i, row in df.iterrows():
date = row["Date"]
prices = {"Equity": row["Equity_Price"], "Gold": row["MA_Gold_Price"], "Oil": row["Oil_Price"]}
strat_port.record_snapshot(date, prices)

signal = engine.generate_signal(row)
nav = strat_port.compute_nav(prices)
current_weights = {asset: (strat_port.positions.get(asset, 0) * prices.get(asset, 0)) / nav if nav > 0 else 0
for asset in prices}

is_risk_off = "Risk-Off" in signal.reason or "De-risk" in signal.reason
max_deviation = max([abs(target_w - current_weights.get(asset, 0.0)) for asset, target_w in signal.target_weights.items()] + [0])

if (i - last_rebalance_idx >= 21) or (is_risk_off and max_deviation > 0.05):
strat_port.rebalance(signal.target_weights, prices, date, signal.reason)
if i - last_rebalance_idx >= 21:
last_rebalance_idx = i

# Buy and hold
bh_port = Portfolio(initial_capital=100_000)
first_price = df.iloc[0]["Equity_Price"]
bh_port.buy("Equity", (100_000 * 0.90) // first_price, first_price, df.iloc[0]["Date"], "Initial")

for _, row in df.iterrows():
bh_port.record_snapshot(row["Date"], {"Equity": row["Equity_Price"], "Gold": row["MA_Gold_Price"], "Oil": row["Oil_Price"]})

return strat_port, bh_port

st.sidebar.header("Running Simulation...")
with st.spinner('Running Backtest Simulation...'):
strat_port, bh_port = run_simulations(df)
st.sidebar.success("Simulation Complete!")

strat_nav = strat_port.nav_history
bh_nav = bh_port.nav_history
strat_ret = strat_port.get_returns().reset_index(drop=True)
bh_ret = bh_port.get_returns().reset_index(drop=True)

# Ensure benchmark returns are matched in length
equity_ret = df["Equity_Returns_clean"].values[-len(strat_ret):] if len(strat_ret) > 0 else df["Equity_Returns_clean"].values
equity_series = pd.Series(equity_ret)

strat_metrics = compute_all_risk_metrics(strat_ret, benchmark_returns=equity_series)
bh_metrics = compute_all_risk_metrics(bh_ret, benchmark_returns=equity_series)

# --- Display KPIs ---
col1, col2, col3, col4 = st.columns(4)
col1.metric("Strategy Total Return", f"{strat_metrics.total_return:.2%}", delta_color="normal")
col2.metric("Buy & Hold Return", f"{bh_metrics.total_return:.2%}", delta_color="normal")
col3.metric("Strategy Max Drawdown", f"{strat_metrics.drawdown.max_drawdown:.2%}")
col4.metric("Strategy Sharpe", f"{strat_metrics.sharpe_ratio:.2f}")

st.divider()

# --- Interactive Charts ---
st.subheader("Performance Comparison (NAV)")
chart_data = pd.DataFrame({
'Date': strat_nav['date'],
'Active Strategy': strat_nav['nav'],
'Buy & Hold': bh_nav['nav']
}).set_index('Date')

st.line_chart(chart_data)

st.subheader("Drawdown Comparison")
cum_s = (1 + strat_ret).cumprod()
cum_b = (1 + bh_ret).cumprod()
dd_s = (cum_s - cum_s.cummax()) / cum_s.cummax()
dd_b = (cum_b - cum_b.cummax()) / cum_b.cummax()

dd_data = pd.DataFrame({
'Date': strat_nav['date'],
'Strategy Drawdown': dd_s.values,
'B&H Drawdown': dd_b.values
}).set_index('Date')

st.line_chart(dd_data)

# --- Risk Table ---
st.subheader("Detailed Risk Metrics")
metrics_df = pd.DataFrame({
"Metric": ["Annualised Return", "Annualised Volatility", "Sharpe Ratio", "Sortino Ratio", "VaR 95%", "Max Drawdown"],
"Active Strategy": [
f"{strat_metrics.ann_return:.2%}", f"{strat_metrics.ann_volatility:.2%}",
f"{strat_metrics.sharpe_ratio:.2f}", f"{strat_metrics.sortino_ratio:.2f}",
f"{strat_metrics.var_95.historical:.2%}", f"{strat_metrics.drawdown.max_drawdown:.2%}"
],
"Buy & Hold": [
f"{bh_metrics.ann_return:.2%}", f"{bh_metrics.ann_volatility:.2%}",
f"{bh_metrics.sharpe_ratio:.2f}", f"{bh_metrics.sortino_ratio:.2f}",
f"{bh_metrics.var_95.historical:.2%}", f"{bh_metrics.drawdown.max_drawdown:.2%}"
]
})
st.table(metrics_df)

# --- Trade Logs ---
st.subheader("Trade Audit Log (Last 100 Trades)")
trades = strat_port.trade_log
if not trades.empty:
st.dataframe(trades.tail(100).sort_values(by="date", ascending=False), use_container_width=True)
else:
st.write("No trades executed.")
Loading