diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..136c3028
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,225 @@
+#self files
+.pdf
+
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#   Usually these files are written by a python script from a template
+#   before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+*.lcov
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+# Pipfile.lock
+
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+# uv.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+# poetry.lock
+# poetry.toml
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+# pdm.lock
+# pdm.toml
+.pdm-python
+.pdm-build/
+
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+# pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi/*
+!.pixi/config.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule*
+celerybeat.pid
+
+# Redis
+*.rdb
+*.aof
+*.pid
+
+# RabbitMQ
+mnesia/
+rabbitmq/
+rabbitmq-data/
+
+# ActiveMQ
+activemq-data/
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#   JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#   be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#   and can be added to the global gitignore or merged into this file.  For a more nuclear
+#   option (not recommended) you can uncomment the following to ignore the entire idea folder.
+# .idea/
+
+# Abstra
+#   Abstra is an AI-powered process automation framework.
+#   Ignore directories containing user credentials, local state, and settings.
+#   Learn more at https://abstra.io/docs
+.abstra/
+
+# Visual Studio Code
+#   Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#   that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#   and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#   you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Temporary file for partial code execution
+tempCodeRunnerFile.py
+
+# Ruff stuff:
+.ruff_cache/
+
+# PyPI configuration file
+.pypirc
+
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/
+
+# Streamlit
+.streamlit/secrets.toml
\ No newline at end of file
diff --git a/README.md b/README.md
index d68e7c4e..bf6aa1b6 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,23 @@
 # Hedge Fund Risk Modeling & Semi-Automated Trading System
 
 ## Team Information
-- **Team Name**: [Team Name]
-- **Year**: [Year]
-- **All-Female Team**: [Yes/No]
-
+- **Team Name**: Paneer_Lovers
+- **Year**: 2
+- **All-Female Team**: No
 ## Architecture Overview
 
 #### Describe your approach here. Keep it short and clear.
 
     - How does your system ingest and preprocess the varying data sources (market, macro, sentiment)?
+      The system uses `ingestion.py` to merge four datasets (Equity, Macro, Oil, Multi-Asset) via an inner join on the 'Date' index. `preprocessing.py` standardizes scales using Z-score Normalization and applies Winsorization (±3 sigma) to clip extreme outliers, preventing bias. We engineer features like Rolling Volatility, Momentum, and Cross-Asset Correlation.
+
     - What risk modeling techniques were selected, and how are they integrated into the trading decision pipeline?
+      We implemented Historical VaR, Parametric VaR, Conditional VaR (Expected Shortfall), and Maximum Drawdown. These are integrated as "Safety Overlays." If 20-day rolling volatility breaches 25%, the system overrides our Machine Learning signals and executes an emergency "Risk-Off" shift into Gold and Cash.
+
     - How does your semi-automated strategy generate signals while respecting portfolio constraints and handling realistic conditions like slippage?
+      An `MLSignalEngine` generates portfolio targets using a Random Forest model tuned via TimeSeriesSplit to prevent look-ahead bias. The Portfolio manager executes targets while enforcing `max_position_pct` constraints. It simulates 0.1% Commission and 0.05% Slippage, and utilizes a "Significance Filter" (>5% deviation) to avoid excessive trading fees.
+
     - How is the dashboard designed to provide explainable insights and key metrics (Sharpe, drawdown) to stakeholders?
+      The interactive Streamlit dashboard provides live NAV Line Charts and Drawdown Visualizations compared to a Buy-and-Hold benchmark. It computes live risk-adjusted metrics (Sharpe, Sortino, Calmar, Alpha, Beta). Most importantly, it features an immutable Trade Audit Log displaying the exact algorithmic probability or rule that triggered every transaction.
 
 **Note:** Please do not change the format or spelling of anything in this README. The fields are extracted using a script, so any changes to the structure or formatting may break the extraction process.
diff --git a/app.py b/app.py
new file mode 100644
index 00000000..c120310c
--- /dev/null
+++ b/app.py
@@ -0,0 +1,144 @@
+import streamlit as st
+import pandas as pd
+import numpy as np
+import os
+import sys
+from pathlib import Path
+
+# Setup Path
+ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(ROOT / "src"))
+
+from ingestion import load_master
+from preprocessing import preprocess
+from risk import compute_all_risk_metrics
+from portfolio import Portfolio
+from signals import MLSignalEngine
+
+# Use full page width
+st.set_page_config(layout="wide", page_title="Hedge Fund Risk Dashboard")
+
+st.title("📈 Hedge Fund Risk & Trading Dashboard")
+st.markdown("Interactive backtesting dashboard evaluating a **Risk-Aware Trend Following** strategy against a standard **Buy-and-Hold** approach.")
+
+# --- Data Loading (Cached) ---
+@st.cache_data
+def get_data():
+    raw = load_master()
+    df, scaler = preprocess(raw)
+    return raw, df
+
+raw_df, df = get_data()
+
+# --- Simulation Runner (Cached) ---
+@st.cache_data
+def run_simulations(df):
+    # Strategy
+    strat_port = Portfolio(initial_capital=100_000, max_position_pct=0.95)
+    engine = MLSignalEngine()
+    
+    last_rebalance_idx = -999
+    
+    
+    for i, row in df.iterrows():
+        date = row["Date"]
+        prices = {"Equity": row["Equity_Price"], "Gold": row["MA_Gold_Price"], "Oil": row["Oil_Price"]}
+        strat_port.record_snapshot(date, prices)
+        
+        signal = engine.generate_signal(row)
+        nav = strat_port.compute_nav(prices)
+        current_weights = {asset: (strat_port.positions.get(asset, 0) * prices.get(asset, 0)) / nav if nav > 0 else 0 
+                           for asset in prices}
+                           
+        is_risk_off = "Risk-Off" in signal.reason or "De-risk" in signal.reason
+        max_deviation = max([abs(target_w - current_weights.get(asset, 0.0)) for asset, target_w in signal.target_weights.items()] + [0])
+        
+        if (i - last_rebalance_idx >= 21) or (is_risk_off and max_deviation > 0.05):
+            strat_port.rebalance(signal.target_weights, prices, date, signal.reason)
+            if i - last_rebalance_idx >= 21:
+                last_rebalance_idx = i
+
+    # Buy and hold
+    bh_port = Portfolio(initial_capital=100_000)
+    first_price = df.iloc[0]["Equity_Price"]
+    bh_port.buy("Equity", (100_000 * 0.90) // first_price, first_price, df.iloc[0]["Date"], "Initial")
+    
+    for _, row in df.iterrows():
+        bh_port.record_snapshot(row["Date"], {"Equity": row["Equity_Price"], "Gold": row["MA_Gold_Price"], "Oil": row["Oil_Price"]})
+
+    return strat_port, bh_port
+
+st.sidebar.header("Running Simulation...")
+with st.spinner('Running Backtest Simulation...'):
+    strat_port, bh_port = run_simulations(df)
+st.sidebar.success("Simulation Complete!")
+
+strat_nav = strat_port.nav_history
+bh_nav = bh_port.nav_history
+strat_ret = strat_port.get_returns().reset_index(drop=True)
+bh_ret = bh_port.get_returns().reset_index(drop=True)
+
+# Ensure benchmark returns are matched in length
+equity_ret = df["Equity_Returns_clean"].values[-len(strat_ret):] if len(strat_ret) > 0 else df["Equity_Returns_clean"].values
+equity_series = pd.Series(equity_ret)
+
+strat_metrics = compute_all_risk_metrics(strat_ret, benchmark_returns=equity_series)
+bh_metrics = compute_all_risk_metrics(bh_ret, benchmark_returns=equity_series)
+
+# --- Display KPIs ---
+col1, col2, col3, col4 = st.columns(4)
+col1.metric("Strategy Total Return", f"{strat_metrics.total_return:.2%}", delta_color="normal")
+col2.metric("Buy & Hold Return", f"{bh_metrics.total_return:.2%}", delta_color="normal")
+col3.metric("Strategy Max Drawdown", f"{strat_metrics.drawdown.max_drawdown:.2%}")
+col4.metric("Strategy Sharpe", f"{strat_metrics.sharpe_ratio:.2f}")
+
+st.divider()
+
+# --- Interactive Charts ---
+st.subheader("Performance Comparison (NAV)")
+chart_data = pd.DataFrame({
+    'Date': strat_nav['date'],
+    'Active Strategy': strat_nav['nav'],
+    'Buy & Hold': bh_nav['nav']
+}).set_index('Date')
+
+st.line_chart(chart_data)
+
+st.subheader("Drawdown Comparison")
+cum_s = (1 + strat_ret).cumprod()
+cum_b = (1 + bh_ret).cumprod()
+dd_s = (cum_s - cum_s.cummax()) / cum_s.cummax()
+dd_b = (cum_b - cum_b.cummax()) / cum_b.cummax()
+
+dd_data = pd.DataFrame({
+    'Date': strat_nav['date'],
+    'Strategy Drawdown': dd_s.values,
+    'B&H Drawdown': dd_b.values
+}).set_index('Date')
+
+st.line_chart(dd_data)
+
+# --- Risk Table ---
+st.subheader("Detailed Risk Metrics")
+metrics_df = pd.DataFrame({
+    "Metric": ["Annualised Return", "Annualised Volatility", "Sharpe Ratio", "Sortino Ratio", "VaR 95%", "Max Drawdown"],
+    "Active Strategy": [
+        f"{strat_metrics.ann_return:.2%}", f"{strat_metrics.ann_volatility:.2%}", 
+        f"{strat_metrics.sharpe_ratio:.2f}", f"{strat_metrics.sortino_ratio:.2f}", 
+        f"{strat_metrics.var_95.historical:.2%}", f"{strat_metrics.drawdown.max_drawdown:.2%}"
+    ],
+    "Buy & Hold": [
+        f"{bh_metrics.ann_return:.2%}", f"{bh_metrics.ann_volatility:.2%}", 
+        f"{bh_metrics.sharpe_ratio:.2f}", f"{bh_metrics.sortino_ratio:.2f}", 
+        f"{bh_metrics.var_95.historical:.2%}", f"{bh_metrics.drawdown.max_drawdown:.2%}"
+    ]
+})
+st.table(metrics_df)
+
+# --- Trade Logs ---
+st.subheader("Trade Audit Log (Last 100 Trades)")
+trades = strat_port.trade_log
+if not trades.empty:
+    st.dataframe(trades.tail(100).sort_values(by="date", ascending=False), use_container_width=True)
+else:
+    st.write("No trades executed.")
diff --git a/evaluate_ml.py b/evaluate_ml.py
new file mode 100644
index 00000000..183ed29b
--- /dev/null
+++ b/evaluate_ml.py
@@ -0,0 +1,97 @@
+"""
+evaluate_ml.py
+--------------
+Trains the Machine Learning model and backtests the ML Signal Engine.
+
+Usage:
+    venv\\Scripts\\python evaluate_ml.py
+"""
+
+import sys
+from pathlib import Path
+import warnings
+
+warnings.filterwarnings("ignore")
+
+ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(ROOT / "src"))
+
+from ingestion import load_master
+from preprocessing import preprocess
+from ml_model import train_model
+from portfolio import Portfolio
+from signals import MLSignalEngine
+from risk import compute_all_risk_metrics
+
+def evaluate():
+    print("============================================================")
+    print("  LOADING & PREPROCESSING")
+    print("============================================================")
+    raw = load_master()
+    df, _ = preprocess(raw)
+    
+    print("\n============================================================")
+    print("  TRAINING ML MODEL")
+    print("============================================================")
+    # Train and save the model
+    train_model(df)
+    
+    print("\n============================================================")
+    print("  BACKTESTING ML STRATEGY")
+    print("============================================================")
+    
+    # Initialize the new ML Signal Engine
+    engine = MLSignalEngine()
+    
+    port = Portfolio(initial_capital=100_000, max_position_pct=0.95)
+    last_rebalance_idx = -999
+    
+    for i, row in df.iterrows():
+        date = row["Date"]
+        prices = {
+            "Equity": row["Equity_Price"],
+            "Gold": row["MA_Gold_Price"],
+            "Oil": row["Oil_Price"],
+        }
+        
+        port.record_snapshot(date, prices)
+        
+        # ML Engine requires all features to be present. If missing, it will throw an error or predict badly.
+        # But we dropped warmup NaNs in preprocess, so we're good.
+        try:
+            signal = engine.generate_signal(row)
+        except Exception as e:
+            # First few rows might lack rolling features if we didn't drop them all
+            continue
+            
+        nav = port.compute_nav(prices)
+        current_weights = {asset: (port.positions.get(asset, 0) * prices.get(asset, 0)) / nav if nav > 0 else 0 
+                           for asset in prices}
+                           
+        # Check max deviation
+        max_deviation = 0
+        for asset, target_w in signal.target_weights.items():
+            curr_w = current_weights.get(asset, 0.0)
+            max_deviation = max(max_deviation, abs(target_w - curr_w))
+            
+        # Rebalance every 21 days OR if deviation > 10% (more lenient than rule-based to avoid overtrading)
+        if (i - last_rebalance_idx >= 21) or (max_deviation > 0.10):
+            port.rebalance(signal.target_weights, prices, date, signal.reason)
+            if i - last_rebalance_idx >= 21:
+                last_rebalance_idx = i
+
+    strat_ret = port.get_returns()
+    equity_ret = df["Equity_Returns_clean"].values[-len(strat_ret):] if len(strat_ret) > 0 else df["Equity_Returns_clean"].values
+    
+    metrics = compute_all_risk_metrics(strat_ret.reset_index(drop=True), benchmark_returns=pd.Series(equity_ret), risk_free_annual=0.02)
+    
+    print("\n  [ML Strategy Portfolio]")
+    print(metrics.summary())
+    
+    print(f"\n  Total trades executed: {len(port.trade_log)}")
+    print("  First 10 Trades:")
+    if not port.trade_log.empty:
+        print(port.trade_log.head(10)[["date", "asset", "action", "quantity", "reason"]].to_string())
+
+if __name__ == "__main__":
+    evaluate()
diff --git a/evaluate_phase1.py b/evaluate_phase1.py
new file mode 100644
index 00000000..40976aa3
--- /dev/null
+++ b/evaluate_phase1.py
@@ -0,0 +1,334 @@
+"""
+evaluate_phase1.py
+------------------
+Phase 1 evaluation script.
+
+Runs the full ingestion + preprocessing pipeline and prints a
+comprehensive data quality & feature report to the console.
+
+Usage:
+    python evaluate_phase1.py
+"""
+
+import sys
+import warnings
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import matplotlib
+matplotlib.use("Agg")          # non-interactive backend (no display needed)
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+
+warnings.filterwarnings("ignore")
+
+# ── ensure src/ is on path ────────────────────────────────────────────────────
+ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(ROOT / "src"))
+
+from ingestion    import load_master
+from preprocessing import preprocess
+
+REPORT_DIR = ROOT / "reports"
+REPORT_DIR.mkdir(exist_ok=True)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def section(title: str):
+    bar = "=" * 60
+    print(f"\n{bar}")
+    print(f"  {title}")
+    print(bar)
+
+
+def subsection(title: str):
+    print(f"\n--- {title} ---")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Phase 1  Evaluation
+# ─────────────────────────────────────────────────────────────────────────────
+
+def evaluate():
+
+    # ── STEP 1: LOAD ──────────────────────────────────────────────────────────
+    section("STEP 1 — Data Ingestion")
+    raw = load_master()
+
+    subsection("Raw dataset overview")
+    print(f"  Rows            : {len(raw):,}")
+    print(f"  Columns         : {len(raw.columns)}")
+    print(f"  Date range      : {raw['Date'].min().date()} -> {raw['Date'].max().date()}")
+    print(f"  Years covered   : {raw['Date'].dt.year.nunique()}")
+    print(f"  NaN totals      :")
+    for col, n in raw.isnull().sum().items():
+        if n:
+            print(f"    {col:<30} : {n}")
+
+    # ── STEP 2: PREPROCESS ────────────────────────────────────────────────────
+    section("STEP 2 — Preprocessing")
+    df, scaler = preprocess(raw)
+
+    subsection("Post-processing overview")
+    print(f"  Rows after clean : {len(df):,}")
+    print(f"  Total NaNs       : {df.isnull().sum().sum()}")
+    print(f"  Columns          : {len(df.columns)}")
+
+    # ── STEP 3: EQUITY RETURN STATS ───────────────────────────────────────────
+    section("STEP 3 — Equity Return Statistics")
+    ret = df["Equity_Returns_clean"]
+
+    ann_ret = ret.mean() * 252
+    ann_vol = ret.std()  * np.sqrt(252)
+    sharpe  = ann_ret / ann_vol if ann_vol else 0
+    skew    = ret.skew()
+    kurt    = ret.kurtosis()
+    var_95  = ret.quantile(0.05)
+    var_99  = ret.quantile(0.01)
+
+    # Max drawdown
+    cum = (1 + ret).cumprod()
+    roll_max = cum.cummax()
+    drawdown = (cum - roll_max) / roll_max
+    max_dd   = drawdown.min()
+
+    print(f"\n  Annualised Return : {ann_ret:>10.4%}")
+    print(f"  Annualised Vol    : {ann_vol:>10.4%}")
+    print(f"  Sharpe Ratio      : {sharpe:>10.4f}")
+    print(f"  Skewness          : {skew:>10.4f}")
+    print(f"  Excess Kurtosis   : {kurt:>10.4f}")
+    print(f"  VaR 95%  (daily)  : {var_95:>10.4%}")
+    print(f"  VaR 99%  (daily)  : {var_99:>10.4%}")
+    print(f"  Max Drawdown      : {max_dd:>10.4%}")
+    print(f"  Total +ve days    : {(ret > 0).sum():>10,} / {len(ret):,}")
+
+    # ── STEP 4: MULTI-ASSET STATS ─────────────────────────────────────────────
+    section("STEP 4 — Multi-Asset Summary")
+    asset_map = {
+        "Equity"   : "Equity_Returns_clean",
+        "Oil"      : "Oil_Returns_clean",
+        "Gold"     : "MA_Gold_Returns_clean",
+    }
+
+    print(f"\n  {'Asset':<10} {'Ann.Ret':>10} {'Ann.Vol':>10} {'Sharpe':>10} {'VaR95%':>10}")
+    print(f"  {'-'*52}")
+    for name, col in asset_map.items():
+        r_  = df[col]
+        ar  = r_.mean() * 252
+        av  = r_.std()  * np.sqrt(252)
+        sh  = ar / av if av else 0
+        v95 = r_.quantile(0.05)
+        print(f"  {name:<10} {ar:>10.4%} {av:>10.4%} {sh:>10.4f} {v95:>10.4%}")
+
+    # ── STEP 5: FEATURE QUALITY ───────────────────────────────────────────────
+    section("STEP 5 — Engineered Features")
+    feat_cols = [c for c in df.columns if any(x in c for x in
+                 ["Vol", "Momentum", "RoC", "Corr", "norm"])]
+
+    print(f"\n  {'Feature':<35} {'Mean':>10} {'Std':>10} {'NaN':>6}")
+    print(f"  {'-'*65}")
+    for c in feat_cols:
+        print(f"  {c:<35} {df[c].mean():>10.4f} {df[c].std():>10.4f} {df[c].isna().sum():>6}")
+
+    # ── STEP 6: CORRELATION MATRIX ────────────────────────────────────────────
+    section("STEP 6 — Return Correlations")
+    corr_cols = [
+        "Equity_Returns_clean", "Oil_Returns_clean",
+        "MA_Gold_Returns_clean",
+        "Inflation_norm", "Interest_Rate_norm",
+        "USD_Index_norm",  "Sentiment_norm"
+    ]
+    corr = df[corr_cols].corr().round(3)
+
+    label_map = {
+        "Equity_Returns_clean"  : "Equity",
+        "Oil_Returns_clean"     : "Oil",
+        "MA_Gold_Returns_clean" : "Gold",
+        "Inflation_norm"        : "Inflation",
+        "Interest_Rate_norm"    : "IntRate",
+        "USD_Index_norm"        : "USD",
+        "Sentiment_norm"        : "Sentiment",
+    }
+    corr.rename(columns=label_map, index=label_map, inplace=True)
+    print(f"\n{corr.to_string()}")
+
+    # ── STEP 7: CHARTS ────────────────────────────────────────────────────────
+    section("STEP 7 — Generating Charts -> reports/")
+    _save_charts(df)
+
+    # ── DONE ──────────────────────────────────────────────────────────────────
+    section("PHASE 1 COMPLETE")
+    print("  Ingestion    : OK")
+    print("  Preprocessing: OK")
+    print("  Features     : OK")
+    print("  Charts saved : reports/")
+    print("\n  Next -> Phase 2: Risk Model (VaR, Drawdown, Sharpe)")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Chart Generator
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _save_charts(df: pd.DataFrame):
+    try:
+        import seaborn as sns
+        has_sns = True
+    except ImportError:
+        has_sns = False
+
+    plt.rcParams.update({
+        "figure.facecolor": "#0f1117",
+        "axes.facecolor"  : "#1a1d2e",
+        "axes.edgecolor"  : "#3a3d4e",
+        "text.color"      : "white",
+        "axes.labelcolor" : "white",
+        "xtick.color"     : "white",
+        "ytick.color"     : "white",
+        "grid.color"      : "#2a2d3e",
+        "grid.linestyle"  : "--",
+        "grid.alpha"      : 0.5,
+    })
+
+    # ── Chart 1: Equity Price + SMA ──
+    fig, axes = plt.subplots(2, 1, figsize=(16, 8), sharex=True)
+    axes[0].plot(df["Date"], df["Equity_Price"],   color="#00d4ff", lw=0.8, label="Price")
+    axes[0].plot(df["Date"], df["Equity_SMA10"],   color="#ff6b35", lw=1.0, alpha=0.8, label="SMA10")
+    axes[0].set_title("Equity Price & SMA10", fontsize=13)
+    axes[0].legend()
+    axes[0].grid(True)
+
+    axes[1].bar(df["Date"], df["Equity_Volume"], color="#7c3aed", alpha=0.5, width=1)
+    axes[1].set_title("Daily Volume", fontsize=13)
+    axes[1].grid(True)
+
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "01_equity_price.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 01_equity_price.png")
+
+    # ── Chart 2: Cumulative Returns ──
+    cum_equity = (1 + df["Equity_Returns_clean"]).cumprod()
+    cum_oil    = (1 + df["Oil_Returns_clean"]).cumprod()
+    cum_gold   = (1 + df["MA_Gold_Returns_clean"]).cumprod()
+
+    fig, ax = plt.subplots(figsize=(16, 6))
+    ax.plot(df["Date"], cum_equity, color="#00d4ff",  lw=1.2, label="Equity")
+    ax.plot(df["Date"], cum_oil,    color="#f59e0b",  lw=1.2, label="Oil")
+    ax.plot(df["Date"], cum_gold,   color="#fbbf24",  lw=1.2, label="Gold")
+    ax.axhline(1, color="white", lw=0.5, linestyle="--", alpha=0.4)
+    ax.set_title("Cumulative Returns — Equity vs Oil vs Gold", fontsize=13)
+    ax.set_ylabel("Growth of $1")
+    ax.legend()
+    ax.grid(True)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "02_cumulative_returns.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 02_cumulative_returns.png")
+
+    # ── Chart 3: Drawdown ──
+    cum  = (1 + df["Equity_Returns_clean"]).cumprod()
+    roll = cum.cummax()
+    dd   = (cum - roll) / roll
+
+    fig, ax = plt.subplots(figsize=(16, 5))
+    ax.fill_between(df["Date"], dd, 0, color="#f87171", alpha=0.7)
+    ax.set_title("Equity Drawdown", fontsize=13)
+    ax.set_ylabel("Drawdown %")
+    ax.grid(True)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "03_drawdown.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 03_drawdown.png")
+
+    # ── Chart 4: Rolling Volatility ──
+    fig, ax = plt.subplots(figsize=(16, 5))
+    ax.plot(df["Date"], df["Equity_RollingVol20"], color="#a78bfa", lw=1, label="Equity Vol (20d)")
+    ax.plot(df["Date"], df["Oil_Volatility"],       color="#f59e0b", lw=1, alpha=0.7, label="Oil Vol")
+    ax.set_title("Rolling Volatility", fontsize=13)
+    ax.set_ylabel("Annualised Volatility")
+    ax.legend()
+    ax.grid(True)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "04_rolling_volatility.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 04_rolling_volatility.png")
+
+    # ── Chart 5: Return Distribution ──
+    fig, ax = plt.subplots(figsize=(12, 5))
+    ret = df["Equity_Returns_clean"]
+    ax.hist(ret, bins=120, color="#00d4ff", alpha=0.8, edgecolor="none")
+    ax.axvline(ret.quantile(0.05), color="red",   lw=2, linestyle="--", label=f"VaR 95%: {ret.quantile(0.05):.3%}")
+    ax.axvline(ret.quantile(0.01), color="#f87171", lw=2, linestyle="--", label=f"VaR 99%: {ret.quantile(0.01):.3%}")
+    ax.axvline(ret.mean(),         color="#10b981", lw=2, label=f"Mean: {ret.mean():.4%}")
+    ax.set_title("Equity Return Distribution", fontsize=13)
+    ax.set_xlabel("Daily Return")
+    ax.legend()
+    ax.grid(True)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "05_return_distribution.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 05_return_distribution.png")
+
+    # ── Chart 6: Macro Signals ──
+    macro_cols  = ["Inflation", "Interest_Rate", "USD_Index", "Sentiment"]
+    mac_colors  = ["#f87171", "#fb923c", "#a78bfa", "#34d399"]
+
+    fig, axes = plt.subplots(4, 1, figsize=(16, 12), sharex=True)
+    for ax, col, color in zip(axes, macro_cols, mac_colors):
+        ax.plot(df["Date"], df[col], color=color, lw=0.7, alpha=0.8)
+        ax.plot(df["Date"], df[col].rolling(30).mean(),
+                color="white", lw=1.2, linestyle="--", alpha=0.6, label="30d avg")
+        ax.set_title(col, fontsize=11)
+        ax.grid(True)
+        ax.legend(fontsize=8)
+    plt.suptitle("Macro Signals Over Time", fontsize=14, y=1.01)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "06_macro_signals.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 06_macro_signals.png")
+
+    # ── Chart 7: Correlation heatmap ──
+    corr_cols = [
+        "Equity_Returns_clean", "Oil_Returns_clean",
+        "MA_Gold_Returns_clean",
+        "Inflation", "Interest_Rate", "USD_Index", "Sentiment",
+    ]
+    label_map = {
+        "Equity_Returns_clean"  : "Equity",
+        "Oil_Returns_clean"     : "Oil",
+        "MA_Gold_Returns_clean" : "Gold",
+        "Inflation"             : "Inflation",
+        "Interest_Rate"         : "IntRate",
+        "USD_Index"             : "USD",
+        "Sentiment"             : "Sentiment",
+    }
+    corr = df[corr_cols].corr()
+    corr.rename(columns=label_map, index=label_map, inplace=True)
+
+    fig, ax = plt.subplots(figsize=(9, 7))
+    cmap = plt.cm.RdBu_r
+    im = ax.imshow(corr, cmap=cmap, vmin=-1, vmax=1)
+    plt.colorbar(im, ax=ax, fraction=0.04)
+    ax.set_xticks(range(len(corr.columns)))
+    ax.set_yticks(range(len(corr.index)))
+    ax.set_xticklabels(corr.columns, rotation=45, ha="right")
+    ax.set_yticklabels(corr.index)
+    for i in range(len(corr)):
+        for j in range(len(corr.columns)):
+            ax.text(j, i, f"{corr.iloc[i,j]:.2f}",
+                    ha="center", va="center", fontsize=9,
+                    color="white" if abs(corr.iloc[i,j]) > 0.4 else "black")
+    ax.set_title("Correlation Matrix", fontsize=13)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "07_correlation_heatmap.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 07_correlation_heatmap.png")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    evaluate()
diff --git a/evaluate_phase2.py b/evaluate_phase2.py
new file mode 100644
index 00000000..a4228ddb
--- /dev/null
+++ b/evaluate_phase2.py
@@ -0,0 +1,311 @@
+"""
+evaluate_phase2.py
+------------------
+Phase 2 evaluation: Risk Model + Portfolio State Manager.
+
+Runs a simple Buy-and-Hold simulation to validate the full
+risk / portfolio pipeline before building the signal engine.
+
+Usage:
+    venv\\Scripts\\python evaluate_phase2.py
+"""
+
+import sys
+import warnings
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+warnings.filterwarnings("ignore")
+
+ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(ROOT / "src"))
+
+from ingestion   import load_master
+from preprocessing import preprocess
+from risk        import (
+    compute_all_risk_metrics,
+    rolling_var,
+    rolling_sharpe,
+    rolling_volatility,
+)
+from portfolio   import Portfolio
+
+REPORT_DIR = ROOT / "reports"
+REPORT_DIR.mkdir(exist_ok=True)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Helpers
+# ─────────────────────────────────────────────────────────────────────────────
+
+def section(title: str):
+    bar = "=" * 60
+    print(f"\n{bar}")
+    print(f"  {title}")
+    print(bar)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Simulation: Buy-and-Hold Equity
+# ─────────────────────────────────────────────────────────────────────────────
+
+def run_buy_and_hold(df: pd.DataFrame) -> Portfolio:
+    """
+    Simulate a simple buy-and-hold strategy on Equity.
+    Buys on Day 1 with 90% of capital, holds to the end.
+    This validates the Portfolio class before any signal logic.
+    """
+    port = Portfolio(
+        initial_capital  = 100_000,
+        transaction_cost = 0.001,    # 0.1% commission
+        slippage_pct     = 0.0005,   # 0.05% slippage
+        max_position_pct = 0.95,     # allow up to 95% in one asset
+    )
+
+    first_row  = df.iloc[0]
+    first_price = first_row["Equity_Price"]
+    first_date  = first_row["Date"]
+
+    # Allocate 90% of capital to equity on Day 1
+    budget   = port.initial_capital * 0.90
+    quantity = budget // first_price          # whole shares only
+    port.buy("Equity", quantity, first_price, first_date,
+             reason="Buy-and-hold initialisation")
+
+    # Step through every day
+    for _, row in df.iterrows():
+        port.record_snapshot(
+            date   = row["Date"],
+            prices = {
+                "Equity": row["Equity_Price"],
+                "Gold"  : row["MA_Gold_Price"],
+                "Oil"   : row["Oil_Price"],
+            }
+        )
+
+    return port
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Main Evaluation
+# ─────────────────────────────────────────────────────────────────────────────
+
+def evaluate():
+
+    # ── Load + preprocess ─────────────────────────────────────────────────────
+    section("LOADING & PREPROCESSING")
+    raw    = load_master()
+    df, _  = preprocess(raw)
+    print(f"  Dataset: {len(df):,} rows | {df['Date'].min().date()} -> {df['Date'].max().date()}")
+
+    # ── Portfolio simulation ───────────────────────────────────────────────────
+    section("STEP 1 — Buy-and-Hold Portfolio Simulation")
+    port = run_buy_and_hold(df)
+
+    final_prices = {
+        "Equity": df.iloc[-1]["Equity_Price"],
+        "Gold"  : df.iloc[-1]["MA_Gold_Price"],
+        "Oil"   : df.iloc[-1]["Oil_Price"],
+    }
+    print(port.summary(prices=final_prices))
+
+    nav_df    = port.nav_history
+    port_ret  = port.get_returns()
+
+    # ── Risk Metrics ──────────────────────────────────────────────────────────
+    section("STEP 2 — Risk Metrics")
+
+    equity_ret   = df["Equity_Returns_clean"]
+    oil_ret      = df["Oil_Returns_clean"]
+    gold_ret     = df["MA_Gold_Returns_clean"]
+
+    # Portfolio risk (using NAV returns)
+    port_metrics = compute_all_risk_metrics(
+        returns           = port_ret,
+        benchmark_returns = equity_ret,   # equity index as benchmark
+        dates             = nav_df["date"],
+        risk_free_annual  = 0.02,
+    )
+    print("\n  [Buy-and-Hold Portfolio]")
+    print(port_metrics.summary())
+
+    # Raw equity risk (as baseline)
+    eq_metrics = compute_all_risk_metrics(
+        returns          = equity_ret,
+        dates            = df["Date"],
+        risk_free_annual = 0.02,
+    )
+    print("\n  [Raw Equity Benchmark]")
+    print(eq_metrics.summary())
+
+    # ── Multi-asset Risk Table ────────────────────────────────────────────────
+    section("STEP 3 — Multi-Asset Risk Comparison")
+
+    assets = {
+        "Portfolio" : port_ret,
+        "Equity"    : equity_ret,
+        "Oil"       : oil_ret,
+        "Gold"      : gold_ret,
+    }
+
+    print(f"\n  {'Asset':<12} {'Ann.Ret':>9} {'Ann.Vol':>9} {'Sharpe':>8} "
+          f"{'Sortino':>9} {'Calmar':>8} {'VaR95%':>8} {'MaxDD':>9}")
+    print(f"  {'-' * 72}")
+
+    for name, ret in assets.items():
+        m = compute_all_risk_metrics(ret, risk_free_annual=0.02)
+        print(
+            f"  {name:<12} {m.ann_return:>9.3%} {m.ann_volatility:>9.3%} "
+            f"{m.sharpe_ratio:>8.4f} {m.sortino_ratio:>9.4f} "
+            f"{m.calmar_ratio:>8.4f} {m.var_95.historical:>8.3%} "
+            f"{m.drawdown.max_drawdown:>9.3%}"
+        )
+
+    # ── Trade Log Audit ───────────────────────────────────────────────────────
+    section("STEP 4 — Trade Log (Audit Trail)")
+    tlog = port.trade_log
+    print(f"\n  Total trades executed: {len(tlog)}")
+    print(tlog.to_string(index=False))
+
+    # ── Charts ────────────────────────────────────────────────────────────────
+    section("STEP 5 — Generating Phase 2 Charts")
+    _save_charts(df, nav_df, port_ret, equity_ret)
+
+    # ── Done ──────────────────────────────────────────────────────────────────
+    section("PHASE 2 COMPLETE")
+    print("  Portfolio Manager : OK")
+    print("  Risk Metrics      : OK")
+    print("  VaR / CVaR        : OK")
+    print("  Drawdown          : OK")
+    print("  Alpha / Beta      : OK")
+    print("  Trade Audit Log   : OK")
+    print("  Charts saved      : reports/")
+    print("\n  Next -> Phase 3: Signal Engine (buy/sell/hold)")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Charts
+# ─────────────────────────────────────────────────────────────────────────────
+
+def _save_charts(df, nav_df, port_ret, equity_ret):
+    plt.rcParams.update({
+        "figure.facecolor": "#0f1117",
+        "axes.facecolor"  : "#1a1d2e",
+        "axes.edgecolor"  : "#3a3d4e",
+        "text.color"      : "white",
+        "axes.labelcolor" : "white",
+        "xtick.color"     : "white",
+        "ytick.color"     : "white",
+        "grid.color"      : "#2a2d3e",
+        "grid.linestyle"  : "--",
+        "grid.alpha"      : 0.5,
+    })
+
+    # ── Chart 8: NAV over time ──
+    fig, axes = plt.subplots(2, 1, figsize=(16, 9), sharex=True)
+
+    cum_eq = (1 + equity_ret).cumprod() * 100_000
+    axes[0].plot(nav_df["date"], nav_df["nav"],  color="#00d4ff", lw=1.5, label="Portfolio NAV")
+    axes[0].plot(df["Date"],     cum_eq.values,  color="#ff6b35", lw=1.0, alpha=0.7, linestyle="--", label="Equity Buy&Hold ($100K)")
+    axes[0].axhline(100_000, color="white", lw=0.5, linestyle=":", alpha=0.4, label="Initial Capital")
+    axes[0].set_title("Portfolio NAV vs Equity Benchmark", fontsize=13)
+    axes[0].set_ylabel("Portfolio Value ($)")
+    axes[0].legend(fontsize=9)
+    axes[0].grid(True)
+
+    # Daily returns
+    axes[1].bar(nav_df["date"], nav_df["returns"], color=nav_df["returns"].apply(
+        lambda x: "#10b981" if x >= 0 else "#f87171"), alpha=0.7, width=1)
+    axes[1].axhline(0, color="white", lw=0.5)
+    axes[1].set_title("Daily Portfolio Returns", fontsize=13)
+    axes[1].set_ylabel("Return")
+    axes[1].grid(True)
+
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "08_portfolio_nav.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 08_portfolio_nav.png")
+
+    # ── Chart 9: Drawdown comparison ──
+    fig, ax = plt.subplots(figsize=(16, 6))
+
+    # Portfolio drawdown
+    cum_p  = (1 + port_ret).cumprod()
+    dd_p   = (cum_p - cum_p.cummax()) / cum_p.cummax()
+
+    # Equity drawdown
+    cum_e  = (1 + equity_ret).cumprod()
+    dd_e   = (cum_e - cum_e.cummax()) / cum_e.cummax()
+
+    ax.fill_between(nav_df["date"], dd_p.values,           color="#7c3aed", alpha=0.6, label="Portfolio DD")
+    ax.fill_between(df["Date"],     dd_e.values, color="#f87171", alpha=0.4, label="Equity DD")
+    ax.axhline(0, color="white", lw=0.5)
+    ax.set_title("Drawdown: Portfolio vs Equity", fontsize=13)
+    ax.set_ylabel("Drawdown %")
+    ax.legend()
+    ax.grid(True)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "09_drawdown_comparison.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 09_drawdown_comparison.png")
+
+    # ── Chart 10: Rolling Risk Metrics ──
+    fig, axes = plt.subplots(3, 1, figsize=(16, 12), sharex=True)
+
+    roll_vol   = rolling_volatility(equity_ret, window=20)
+    roll_sh    = rolling_sharpe(equity_ret, window=252)
+    roll_var95 = rolling_var(equity_ret, window=252, confidence=0.95)
+
+    axes[0].plot(df["Date"], roll_vol,   color="#a78bfa", lw=1)
+    axes[0].set_title("Rolling 20-day Annualised Volatility", fontsize=11)
+    axes[0].set_ylabel("Volatility")
+    axes[0].grid(True)
+
+    axes[1].plot(df["Date"], roll_sh, color="#34d399", lw=1)
+    axes[1].axhline(0, color="white", lw=0.5, linestyle="--")
+    axes[1].axhline(1, color="#10b981", lw=0.5, linestyle=":", alpha=0.6, label="Sharpe=1 target")
+    axes[1].set_title("Rolling 252-day Sharpe Ratio", fontsize=11)
+    axes[1].set_ylabel("Sharpe")
+    axes[1].legend(fontsize=8)
+    axes[1].grid(True)
+
+    axes[2].fill_between(df["Date"], roll_var95, 0, color="#f87171", alpha=0.7)
+    axes[2].set_title("Rolling 252-day VaR (95%)", fontsize=11)
+    axes[2].set_ylabel("Daily VaR")
+    axes[2].grid(True)
+
+    plt.suptitle("Rolling Risk Metrics — Equity", fontsize=14)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "10_rolling_risk.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 10_rolling_risk.png")
+
+    # ── Chart 11: Return Distribution with VaR lines ──
+    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
+
+    for ax, (label, ret) in zip(axes, [("Portfolio", port_ret), ("Equity", equity_ret)]):
+        ax.hist(ret.dropna(), bins=100, color="#00d4ff", alpha=0.7, edgecolor="none", density=True)
+        v95 = ret.quantile(0.05)
+        v99 = ret.quantile(0.01)
+        ax.axvline(v95, color="orange", lw=2, label=f"VaR 95%: {v95:.3%}")
+        ax.axvline(v99, color="red",    lw=2, label=f"VaR 99%: {v99:.3%}")
+        ax.axvline(ret.mean(), color="#10b981", lw=1.5, linestyle="--", label=f"Mean: {ret.mean():.4%}")
+        ax.set_title(f"{label} Return Distribution", fontsize=12)
+        ax.set_xlabel("Daily Return")
+        ax.legend(fontsize=8)
+        ax.grid(True)
+
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "11_return_distributions.png", dpi=120, bbox_inches="tight")
+    plt.close()
+    print("  Saved: 11_return_distributions.png")
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    evaluate()
diff --git a/evaluate_phase3.py b/evaluate_phase3.py
new file mode 100644
index 00000000..100c8ee1
--- /dev/null
+++ b/evaluate_phase3.py
@@ -0,0 +1,184 @@
+"""
+evaluate_phase3.py
+------------------
+Phase 3 evaluation: Signal Engine + Portfolio Simulator.
+
+Runs the Risk-Aware Signal Engine over the dataset and evaluates
+its performance against the Buy-and-Hold benchmark.
+
+Usage:
+    venv\\Scripts\\python evaluate_phase3.py
+"""
+
+import sys
+import warnings
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+
+warnings.filterwarnings("ignore")
+
+ROOT = Path(__file__).resolve().parent
+sys.path.insert(0, str(ROOT / "src"))
+
+from ingestion   import load_master
+from preprocessing import preprocess
+from risk        import compute_all_risk_metrics
+from portfolio   import Portfolio
+from signals     import RiskAwareSignalEngine
+
+REPORT_DIR = ROOT / "reports"
+REPORT_DIR.mkdir(exist_ok=True)
+
+def section(title: str):
+    bar = "=" * 60
+    print(f"\n{bar}")
+    print(f"  {title}")
+    print(bar)
+
+def run_strategy(df: pd.DataFrame) -> Portfolio:
+    port = Portfolio(
+        initial_capital  = 100_000,
+        transaction_cost = 0.001,
+        slippage_pct     = 0.0005,
+        max_position_pct = 0.95,
+    )
+    
+    engine = RiskAwareSignalEngine()
+    
+    # We rebalance monthly (every 21 trading days) to save transaction costs,
+    # unless a critical risk threshold is crossed (sentiment drops suddenly).
+    last_rebalance_idx = -999
+    
+    for i, row in df.iterrows():
+        date = row["Date"]
+        prices = {
+            "Equity": row["Equity_Price"],
+            "Gold"  : row["MA_Gold_Price"],
+            "Oil"   : row["Oil_Price"],
+        }
+        
+        # Always record snapshot to track daily NAV
+        port.record_snapshot(date=date, prices=prices)
+        
+        # Generate target weights
+        signal = engine.generate_signal(row)
+        
+        # Current portfolio value and allocations
+        nav = port.compute_nav(prices)
+        current_weights = {asset: (port.positions.get(asset, 0) * prices.get(asset, 0)) / nav if nav > 0 else 0 
+                           for asset in prices}
+        
+        # Rebalance if 21 days have passed OR it's a "Risk-Off" emergency signal
+        is_risk_off = "Risk-Off" in signal.reason or "De-risk" in signal.reason
+        
+        # Calculate max deviation from target weights
+        max_deviation = 0
+        for asset, target_w in signal.target_weights.items():
+            curr_w = current_weights.get(asset, 0.0)
+            max_deviation = max(max_deviation, abs(target_w - curr_w))
+        
+        # Only rebalance if it's been 21 days OR (it's risk-off AND we are off-target by > 5%)
+        # This prevents trading every single day during a prolonged risk-off regime.
+        should_rebalance = (i - last_rebalance_idx >= 21) or (is_risk_off and max_deviation > 0.05)
+        
+        if should_rebalance:
+            port.rebalance(
+                target_weights=signal.target_weights,
+                prices=prices,
+                date=date,
+                reason=signal.reason
+            )
+            # update last rebalance index, but if we emergency traded, we still wait 21 days
+            # before regular rebalancing to avoid whipsawing.
+            if i - last_rebalance_idx >= 21:
+                last_rebalance_idx = i
+
+    return port
+
+def run_buy_and_hold(df: pd.DataFrame) -> Portfolio:
+    """Run pure Buy and Hold for benchmark comparison"""
+    port = Portfolio(initial_capital=100_000)
+    first_price = df.iloc[0]["Equity_Price"]
+    quantity = (100_000 * 0.90) // first_price
+    port.buy("Equity", quantity, first_price, df.iloc[0]["Date"], "Buy and Hold")
+    
+    for _, row in df.iterrows():
+        port.record_snapshot(
+            date=row["Date"],
+            prices={"Equity": row["Equity_Price"], "Gold": row["MA_Gold_Price"], "Oil": row["Oil_Price"]}
+        )
+    return port
+
+def evaluate():
+    section("LOADING & PREPROCESSING")
+    raw = load_master()
+    df, _ = preprocess(raw)
+    print(f"  Dataset: {len(df):,} rows")
+
+    section("STEP 1 — Run Strategy")
+    strat_port = run_strategy(df)
+    strat_ret = strat_port.get_returns()
+    
+    bh_port = run_buy_and_hold(df)
+    bh_ret = bh_port.get_returns()
+
+    section("STEP 2 — Risk Metrics Comparison")
+    
+    equity_ret = df["Equity_Returns_clean"]
+    
+    strat_metrics = compute_all_risk_metrics(strat_ret, benchmark_returns=equity_ret, risk_free_annual=0.02)
+    bh_metrics = compute_all_risk_metrics(bh_ret, benchmark_returns=equity_ret, risk_free_annual=0.02)
+    
+    print("\n  [Active Strategy Portfolio]")
+    print(strat_metrics.summary())
+    
+    print("\n  [Buy & Hold Portfolio]")
+    print(bh_metrics.summary())
+    
+    section("STEP 3 — Extracting Trade Logs")
+    tlog = strat_port.trade_log
+    print(f"\n  Total trades executed by Strategy: {len(tlog)}")
+    print("  Showing first 10 trades:")
+    if len(tlog) > 0:
+        print(tlog.head(10).to_string(index=False, columns=["date", "asset", "action", "quantity", "price", "reason"]))
+
+    section("STEP 4 — Generating Phase 3 Charts")
+    _save_charts(df, strat_port.nav_history, bh_port.nav_history, strat_ret, bh_ret)
+    
+    section("PHASE 3 COMPLETE")
+
+def _save_charts(df, strat_nav, bh_nav, strat_ret, bh_ret):
+    plt.rcParams.update({"figure.facecolor": "#0f1117", "axes.facecolor": "#1a1d2e", "text.color": "white", "axes.labelcolor": "white", "xtick.color": "white", "ytick.color": "white", "grid.color": "#2a2d3e"})
+
+    fig, ax = plt.subplots(figsize=(16, 6))
+    ax.plot(strat_nav["date"], strat_nav["nav"], color="#34d399", lw=1.5, label="Active Strategy")
+    ax.plot(bh_nav["date"], bh_nav["nav"], color="#ff6b35", lw=1.0, alpha=0.7, label="Buy & Hold ($100K)")
+    ax.axhline(100_000, color="white", lw=0.5, linestyle=":")
+    ax.set_title("Strategy vs Buy & Hold NAV", fontsize=13)
+    ax.set_ylabel("Portfolio Value ($)")
+    ax.legend()
+    ax.grid(True, linestyle="--", alpha=0.5)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "12_strategy_vs_bh.png", dpi=120)
+    plt.close()
+    
+    fig, ax = plt.subplots(figsize=(16, 5))
+    cum_s = (1 + strat_ret).cumprod()
+    cum_b = (1 + bh_ret).cumprod()
+    ax.fill_between(strat_nav["date"], (cum_s - cum_s.cummax())/cum_s.cummax(), color="#34d399", alpha=0.5, label="Active Strategy DD")
+    ax.fill_between(bh_nav["date"], (cum_b - cum_b.cummax())/cum_b.cummax(), color="#f87171", alpha=0.4, label="Buy & Hold DD")
+    ax.set_title("Drawdown Comparison", fontsize=13)
+    ax.legend()
+    ax.grid(True, linestyle="--", alpha=0.5)
+    plt.tight_layout()
+    plt.savefig(REPORT_DIR / "13_strategy_drawdown.png", dpi=120)
+    plt.close()
+    print("  Saved: 12_strategy_vs_bh.png and 13_strategy_drawdown.png")
+
+if __name__ == "__main__":
+    evaluate()
diff --git a/models/rf_model.joblib b/models/rf_model.joblib
new file mode 100644
index 00000000..b3c5b62d
Binary files /dev/null and b/models/rf_model.joblib differ
diff --git a/notebooks/01_eda.ipynb b/notebooks/01_eda.ipynb
new file mode 100644
index 00000000..41817f7d
--- /dev/null
+++ b/notebooks/01_eda.ipynb
@@ -0,0 +1,359 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "d2b00f42",
+   "metadata": {},
+   "source": [
+    "# 01 — EDA: Exploratory Data Analysis\n",
+    "**Goal:** Understand all 4 datasets, spot patterns, and validate the data before building any models.\n",
+    "\n",
+    "Datasets:\n",
+    "- `equity_dataset.csv` — Equity price, returns, SMA  \n",
+    "- `macro_dataset.csv` — Inflation, Interest rates, USD, Sentiment  \n",
+    "- `multi_asset_dataset.csv` — Oil, Gold, Bonds  \n",
+    "- `oil_dataset.csv` — Oil price + volatility"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c80e1fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../src')  # so we can import from src/\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.dates as mdates\n",
+    "import seaborn as sns\n",
+    "\n",
+    "from ingestion import load_master, validate\n",
+    "\n",
+    "plt.style.use('dark_background')\n",
+    "sns.set_palette('husl')\n",
+    "\n",
+    "# Load everything in one shot\n",
+    "df = load_master()\n",
+    "validate(df)\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "871cacc0",
+   "metadata": {},
+   "source": [
+    "## 1. Dataset Overview"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dadd72a5",
+   "metadata": {
+    "vscode": {
+     "languageId": "markdown"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "print(f\"Total rows       : {len(df):,}\")\n",
+    "print(f\"Date range       : {df['Date'].min().date()} → {df['Date'].max().date()}\")\n",
+    "print(f\"Trading days/yr  : {len(df) / df['Date'].dt.year.nunique():.0f}\")\n",
+    "print(f\"\\nAll columns:\")\n",
+    "for col in df.columns:\n",
+    "    print(f\"  {col:<25} | NaN: {df[col].isna().sum():>5} | dtype: {df[col].dtype}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "192b5ad8",
+   "metadata": {},
+   "source": [
+    "## 2. Equity Price Over Time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a3a224b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axes = plt.subplots(2, 1, figsize=(16, 8), sharex=True)\n",
+    "\n",
+    "# Price\n",
+    "axes[0].plot(df['Date'], df['Equity_Price'], color='#00d4ff', lw=1, label='Price')\n",
+    "axes[0].plot(df['Date'], df['Equity_SMA10'], color='#ff6b35', lw=1, alpha=0.8, label='SMA10')\n",
+    "axes[0].set_title('Equity Price & 10-Day SMA', fontsize=14)\n",
+    "axes[0].set_ylabel('Price ($)')\n",
+    "axes[0].legend()\n",
+    "\n",
+    "# Volume\n",
+    "axes[1].bar(df['Date'], df['Equity_Volume'], color='#7c3aed', alpha=0.6, width=1)\n",
+    "axes[1].set_title('Daily Volume', fontsize=14)\n",
+    "axes[1].set_ylabel('Volume')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9df9c5eb",
+   "metadata": {},
+   "source": [
+    "## 3. Returns Distribution"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "56b60c0b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "returns = df['Equity_Returns'].dropna()\n",
+    "\n",
+    "fig, axes = plt.subplots(1, 3, figsize=(18, 5))\n",
+    "\n",
+    "# Histogram\n",
+    "axes[0].hist(returns, bins=100, color='#00d4ff', alpha=0.8, edgecolor='none')\n",
+    "axes[0].axvline(returns.mean(), color='#ff6b35', lw=2, label=f'Mean: {returns.mean():.4f}')\n",
+    "axes[0].axvline(returns.quantile(0.05), color='red', lw=1.5, linestyle='--', label=f'5% VaR: {returns.quantile(0.05):.4f}')\n",
+    "axes[0].set_title('Return Distribution')\n",
+    "axes[0].legend(fontsize=9)\n",
+    "\n",
+    "# Rolling returns over time\n",
+    "axes[1].plot(df['Date'], returns, color='#00d4ff', alpha=0.5, lw=0.5)\n",
+    "axes[1].axhline(0, color='white', lw=0.5, linestyle='--')\n",
+    "axes[1].set_title('Daily Returns Over Time')\n",
+    "\n",
+    "# Cumulative return\n",
+    "cumret = (1 + returns).cumprod()\n",
+    "axes[2].plot(df['Date'].iloc[1:], cumret.values, color='#10b981', lw=1.5)\n",
+    "axes[2].set_title('Cumulative Return')\n",
+    "axes[2].set_ylabel('Portfolio Value (starting at 1)')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "print(f\"Annualized Return : {returns.mean() * 365:.2%}\")\n",
+    "print(f\"Annualized Std    : {returns.std() * np.sqrt(365):.2%}\")\n",
+    "print(f\"Sharpe (0% RFR)   : {(returns.mean() / returns.std()) * np.sqrt(365):.3f}\")\n",
+    "print(f\"Max Daily Return  : {returns.max():.4f}\")\n",
+    "print(f\"Min Daily Return  : {returns.min():.4f}\")\n",
+    "print(f\"Skewness          : {returns.skew():.4f}\")\n",
+    "print(f\"Kurtosis          : {returns.kurtosis():.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e4325835",
+   "metadata": {},
+   "source": [
+    "## 4. Multi-Asset Comparison"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aaae1d61",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Normalize prices to 100 at start for fair comparison\n",
+    "assets = {\n",
+    "    'Equity' : 'Equity_Price',\n",
+    "    'Oil'    : 'Oil_Price',\n",
+    "    'Gold'   : 'MA_Gold_Price',\n",
+    "    'Bonds'  : 'MA_Bonds_Price'\n",
+    "}\n",
+    "\n",
+    "colors = ['#00d4ff', '#f59e0b', '#fbbf24', '#10b981']\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(16, 6))\n",
+    "for (name, col), color in zip(assets.items(), colors):\n",
+    "    series = df[col].dropna()\n",
+    "    normalized = (series / series.iloc[0]) * 100\n",
+    "    ax.plot(df['Date'].iloc[:len(series)], normalized, label=name, lw=1.2, color=color)\n",
+    "\n",
+    "ax.axhline(100, color='white', lw=0.5, linestyle='--', alpha=0.5)\n",
+    "ax.set_title('Asset Prices Normalized to 100 (Base = Jan 2020)', fontsize=14)\n",
+    "ax.set_ylabel('Normalized Price')\n",
+    "ax.legend()\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed91b070",
+   "metadata": {},
+   "source": [
+    "## 5. Correlation Matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7701719c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "return_cols = [\n",
+    "    'Equity_Returns', 'Oil_Returns', 'MA_Oil_Returns',\n",
+    "    'MA_Gold_Returns', 'Inflation', 'Interest_Rate',\n",
+    "    'USD_Index', 'Sentiment'\n",
+    "]\n",
+    "\n",
+    "corr = df[return_cols].dropna().corr()\n",
+    "\n",
+    "plt.figure(figsize=(10, 8))\n",
+    "mask = np.triu(np.ones_like(corr, dtype=bool))\n",
+    "sns.heatmap(\n",
+    "    corr, mask=mask, annot=True, fmt='.2f',\n",
+    "    cmap='RdBu_r', center=0, vmin=-1, vmax=1,\n",
+    "    linewidths=0.5, square=True\n",
+    ")\n",
+    "plt.title('Correlation Matrix — Returns & Macro Signals', fontsize=13)\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0abcf9f6",
+   "metadata": {},
+   "source": [
+    "## 6. Macro Signals Over Time"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec5622a4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "macro_cols = ['Inflation', 'Interest_Rate', 'USD_Index', 'Sentiment']\n",
+    "colors_macro = ['#f87171', '#fb923c', '#a78bfa', '#34d399']\n",
+    "\n",
+    "fig, axes = plt.subplots(4, 1, figsize=(16, 12), sharex=True)\n",
+    "for ax, col, color in zip(axes, macro_cols, colors_macro):\n",
+    "    ax.plot(df['Date'], df[col], color=color, lw=0.8)\n",
+    "    # Rolling 30-day average\n",
+    "    ax.plot(df['Date'], df[col].rolling(30).mean(), color='white', lw=1.5, alpha=0.7, linestyle='--')\n",
+    "    ax.set_ylabel(col, fontsize=10)\n",
+    "    ax.set_title(col, fontsize=11)\n",
+    "\n",
+    "plt.suptitle('Macro Signals Over Time (dashed = 30-day avg)', fontsize=14)\n",
+    "plt.tight_layout()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "34fc1e43",
+   "metadata": {},
+   "source": [
+    "## 7. Oil Volatility"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64a22158",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, axes = plt.subplots(2, 1, figsize=(16, 7), sharex=True)\n",
+    "\n",
+    "axes[0].plot(df['Date'], df['Oil_Price'], color='#f59e0b', lw=1)\n",
+    "axes[0].set_title('Oil Price')\n",
+    "\n",
+    "axes[1].fill_between(df['Date'], df['Oil_Volatility'], color='#f87171', alpha=0.7)\n",
+    "axes[1].set_title('Oil Volatility (Rolling Std)')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "print(f\"Mean Oil Volatility : {df['Oil_Volatility'].mean():.5f}\")\n",
+    "print(f\"Peak Oil Volatility : {df['Oil_Volatility'].max():.5f} on {df.loc[df['Oil_Volatility'].idxmax(), 'Date'].date()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed8558c8",
+   "metadata": {},
+   "source": [
+    "## 8. Next Steps\n",
+    "\n",
+    "From this EDA, you should now know:\n",
+    "- ✅ What the overall price trend looks like\n",
+    "- ✅ Which assets are correlated\n",
+    "- ✅ How volatile the data is\n",
+    "- ✅ What macro regimes exist\n",
+    "\n",
+    "**→ Go to `02_preprocessing.ipynb` to clean the data and engineer features.**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9c589ced-c53a-4f41-8b9f-d6eb5cb970f9",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f574d34b-1ca4-4d9c-9e80-ae3a21fe9a37",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cafe6ea5-e559-4e5f-8569-7b0010f4dbc4",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "51a8118c-f53d-4c01-b50b-012e6155b12b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..50075e75
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,10 @@
+pandas>=2.0.0
+numpy>=1.24.0
+matplotlib>=3.7.0
+seaborn>=0.12.0
+scikit-learn>=1.3.0
+jupyter>=1.0.0
+notebook>=7.0.0
+ipykernel>=6.0.0
+streamlit>=1.30.0
+plotly>=5.18.0
diff --git a/src/ingestion.py b/src/ingestion.py
new file mode 100644
index 00000000..e1c7de0c
--- /dev/null
+++ b/src/ingestion.py
@@ -0,0 +1,125 @@
+"""
+ingestion.py
+------------
+Loads, validates, and merges all 4 raw datasets into a single master DataFrame.
+
+Datasets:
+    - equity_dataset.csv     : Daily equity price, volume, returns, SMA_10
+    - macro_dataset.csv      : Inflation, Interest_Rate, USD_Index, Sentiment
+    - multi_asset_dataset.csv: Oil, Gold, Bonds prices + returns
+    - oil_dataset.csv        : Oil price, volume, returns, volatility
+"""
+
+import pandas as pd
+from pathlib import Path
+
+# ── Paths ────────────────────────────────────────────────────────────────────
+DATA_DIR = Path(__file__).resolve().parent.parent / "data" / "raw"
+
+EQUITY_PATH       = DATA_DIR / "equity_dataset.csv"
+MACRO_PATH        = DATA_DIR / "macro_dataset.csv"
+MULTI_ASSET_PATH  = DATA_DIR / "multi_asset_dataset.csv"
+OIL_PATH          = DATA_DIR / "oil_dataset.csv"
+
+
+# ── Loaders ──────────────────────────────────────────────────────────────────
+
+def load_equity() -> pd.DataFrame:
+    """Load equity dataset. Returns: Date, Price, Volume, Returns, SMA_10"""
+    df = pd.read_csv(EQUITY_PATH, parse_dates=["Date"])
+    df = df.rename(columns={
+        "Price":   "Equity_Price",
+        "Volume":  "Equity_Volume",
+        "Returns": "Equity_Returns",
+        "SMA_10":  "Equity_SMA10"
+    })
+    return df.sort_values("Date").reset_index(drop=True)
+
+
+def load_macro() -> pd.DataFrame:
+    """Load macro dataset. Returns: Date, Inflation, Interest_Rate, USD_Index, Sentiment"""
+    df = pd.read_csv(MACRO_PATH, parse_dates=["Date"])
+    return df.sort_values("Date").reset_index(drop=True)
+
+
+def load_multi_asset() -> pd.DataFrame:
+    """Load multi-asset dataset. Returns: Date, Oil, Gold, Bonds, Oil_Returns, Gold_Returns"""
+    df = pd.read_csv(MULTI_ASSET_PATH, parse_dates=["Date"])
+    df = df.rename(columns={
+        "Oil":         "MA_Oil_Price",
+        "Gold":        "MA_Gold_Price",
+        "Bonds":       "MA_Bonds_Price",
+        "Oil_Returns": "MA_Oil_Returns",
+        "Gold_Returns":"MA_Gold_Returns",
+    })
+    return df.sort_values("Date").reset_index(drop=True)
+
+
+def load_oil() -> pd.DataFrame:
+    """Load oil-specific dataset. Returns: Date, Price, Volume, Returns, Volatility"""
+    df = pd.read_csv(OIL_PATH, parse_dates=["Date"])
+    df = df.rename(columns={
+        "Price":      "Oil_Price",
+        "Volume":     "Oil_Volume",
+        "Returns":    "Oil_Returns",
+        "Volatility": "Oil_Volatility"
+    })
+    return df.sort_values("Date").reset_index(drop=True)
+
+
+# ── Master Merge ──────────────────────────────────────────────────────────────
+
+def load_master() -> pd.DataFrame:
+    """
+    Merge all 4 datasets on Date into a single master DataFrame.
+    All datasets share the same daily cadence so a simple inner join is safe.
+
+    Returns
+    -------
+    pd.DataFrame
+        Master DataFrame with all features aligned by Date.
+    """
+    equity      = load_equity()
+    macro       = load_macro()
+    multi_asset = load_multi_asset()
+    oil         = load_oil()
+
+    # Drop redundant oil columns from multi_asset (we have them in oil_dataset)
+    multi_asset_cols = ["Date", "MA_Gold_Price", "MA_Bonds_Price", "MA_Gold_Returns"]
+
+    master = (
+        equity
+        .merge(macro,                  on="Date", how="inner")
+        .merge(multi_asset[multi_asset_cols], on="Date", how="inner")
+        .merge(oil[["Date", "Oil_Price", "Oil_Volume", "Oil_Returns", "Oil_Volatility"]],
+               on="Date", how="inner")
+    )
+
+    print(f"[ingestion] Master shape : {master.shape}")
+    print(f"[ingestion] Date range   : {master['Date'].min().date()} -> {master['Date'].max().date()}")
+    print(f"[ingestion] Columns      : {list(master.columns)}")
+
+    return master
+
+
+# ── Quick Validation ──────────────────────────────────────────────────────────
+
+def validate(df: pd.DataFrame) -> None:
+    """Print a quick health check of the master DataFrame."""
+    print("\n-- NaN counts --")
+    nan_counts = df.isnull().sum()
+    print(nan_counts[nan_counts > 0].to_string() if nan_counts.any() else "No NaNs found OK")
+
+    print("\n-- Duplicate dates --")
+    dupes = df['Date'].duplicated().sum()
+    print(f"{dupes} duplicate date rows" if dupes else "No duplicates OK")
+
+    print("\n-- Basic stats --")
+    print(df.describe().round(4).to_string())
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    master = load_master()
+    validate(master)
diff --git a/src/ml_model.py b/src/ml_model.py
new file mode 100644
index 00000000..8abde86f
--- /dev/null
+++ b/src/ml_model.py
@@ -0,0 +1,113 @@
+"""
+ml_model.py
+-----------
+Machine Learning integration for the trading strategy.
+
+Trains a Random Forest Classifier to predict whether the equity market
+will go UP or DOWN over the next 10 days, using our engineered features.
+"""
+
+import pandas as pd
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
+from sklearn.metrics import classification_report, accuracy_score
+import joblib
+from pathlib import Path
+
+MODEL_DIR = Path(__file__).resolve().parent.parent / "models"
+MODEL_DIR.mkdir(exist_ok=True)
+MODEL_PATH = MODEL_DIR / "rf_model.joblib"
+
+FEATURES = [
+    "Equity_RollingVol20",
+    "Equity_Momentum10",
+    "Equity_Momentum30",
+    "Inflation_RoC",
+    "InterestRate_RoC",
+    "Sentiment_norm",
+    "Corr_Equity_Gold30",
+    "Corr_Equity_Oil30"
+]
+
+def prepare_ml_data(df: pd.DataFrame, forward_window: int = 10):
+    """Creates the target variable for ML training."""
+    future_returns = df["Equity_Price"].shift(-forward_window) / df["Equity_Price"] - 1
+    df["Target"] = (future_returns > 0.0).astype(int)
+    
+    ml_df = df.dropna(subset=FEATURES + ["Target"]).copy()
+    ml_df = ml_df.iloc[:-forward_window]
+    
+    return ml_df
+
+def train_model(df: pd.DataFrame):
+    """Tunes and trains the best model using TimeSeriesSplit."""
+    print("[ml_model] Preparing data for hyperparameter tuning...")
+    ml_df = prepare_ml_data(df)
+    
+    X = ml_df[FEATURES]
+    y = ml_df["Target"]
+    
+    # Chronological split
+    split_idx = int(len(X) * 0.8)
+    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
+    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
+    
+    print(f"[ml_model] Starting Grid Search on {len(X_train)} samples...")
+    
+    # TimeSeriesSplit prevents look-ahead bias during cross-validation
+    tscv = TimeSeriesSplit(n_splits=3)
+    
+    # Define parameter grid
+    param_grid = {
+        'n_estimators': [50, 100, 200],
+        'max_depth': [3, 5, 7],
+        'min_samples_leaf': [20, 50, 100]
+    }
+    
+    rf = RandomForestClassifier(random_state=42, n_jobs=1)
+    
+    # GridSearchCV finds the best combination
+    grid_search = GridSearchCV(
+        estimator=rf,
+        param_grid=param_grid,
+        cv=tscv,
+        scoring='accuracy',
+        n_jobs=1,
+        verbose=1
+    )
+    
+    grid_search.fit(X_train, y_train)
+    
+    best_model = grid_search.best_estimator_
+    print(f"\n[ml_model] Best Parameters Found: {grid_search.best_params_}")
+    
+    # Evaluate best model
+    preds = best_model.predict(X_test)
+    acc = accuracy_score(y_test, preds)
+    print(f"[ml_model] Tuned Test Accuracy: {acc:.2%}")
+    print("[ml_model] Classification Report:")
+    print(classification_report(y_test, preds))
+    
+    # Feature Importance
+    importances = pd.Series(best_model.feature_importances_, index=FEATURES).sort_values(ascending=False)
+    print("\n[ml_model] Feature Importances:")
+    print(importances.to_string())
+    
+    # Save the best model
+    joblib.dump(best_model, MODEL_PATH)
+    print(f"[ml_model] Best model saved to {MODEL_PATH}")
+    return best_model
+
+def predict_signal(row: pd.Series, model: RandomForestClassifier) -> int:
+    """Predicts 1 (UP) or 0 (DOWN) for a single row of data."""
+    # Extract features in the correct order
+    x = [row.get(f, 0.0) for f in FEATURES]
+    pred = model.predict([x])[0]
+    return int(pred)
+
+def get_prediction_probability(row: pd.Series, model: RandomForestClassifier) -> float:
+    """Returns the probability of the UP (1) class."""
+    x = [row.get(f, 0.0) for f in FEATURES]
+    prob = model.predict_proba([x])[0][1]
+    return float(prob)
diff --git a/src/portfolio.py b/src/portfolio.py
new file mode 100644
index 00000000..e6a81840
--- /dev/null
+++ b/src/portfolio.py
@@ -0,0 +1,445 @@
+"""
+portfolio.py
+------------
+Portfolio state manager for the hedge fund backtesting system.
+
+Covers Issue 5 from ISSUES.md:
+    - Track cash, positions (shares held), PnL per asset
+    - Execute buy/sell orders with transaction costs and slippage (Issue 10)
+    - Record a full trade log with timestamps (Issue 14 - audit trail)
+    - Compute daily NAV (Net Asset Value) from positions + cash
+
+Design Principles:
+    - No forward-looking bias: prices are consumed one row at a time
+    - All state mutations go through explicit methods (no direct dict edits)
+    - Trade log is append-only — full audit trail preserved
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+from dataclasses import dataclass, field
+from typing import Optional
+from datetime import date
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Data containers
+# ─────────────────────────────────────────────────────────────────────────────
+
+@dataclass
+class TradeRecord:
+    """Immutable record of a single executed trade."""
+    date        : object       # trade date
+    asset       : str          # asset identifier
+    action      : str          # "BUY" | "SELL"
+    quantity    : float        # number of shares / units
+    price       : float        # execution price (post-slippage)
+    raw_price   : float        # price before slippage
+    slippage    : float        # slippage cost (absolute $)
+    commission  : float        # transaction cost (absolute $)
+    cash_before : float
+    cash_after  : float
+    reason      : str = ""     # signal / rule that triggered the trade
+
+
+@dataclass
+class PortfolioSnapshot:
+    """Point-in-time snapshot of portfolio state."""
+    date        : object
+    cash        : float
+    positions   : dict          # {asset: shares}
+    prices      : dict          # {asset: current price}
+    nav         : float         # total portfolio value
+    returns     : float         # day-over-day return
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Portfolio Manager
+# ─────────────────────────────────────────────────────────────────────────────
+
+class Portfolio:
+    """
+    Manages cash, multi-asset positions, trade execution, and PnL.
+
+    Parameters
+    ----------
+    initial_capital   : starting cash (default $100,000)
+    transaction_cost  : % of trade value charged as commission (default 0.1%)
+    slippage_pct      : % of price added/subtracted for market impact (default 0.05%)
+    max_position_pct  : max % of NAV in a single asset (default 20%)
+
+    Example
+    -------
+    port = Portfolio(initial_capital=100_000)
+    port.buy("Equity", quantity=10, price=150.0, date="2020-01-05")
+    port.sell("Equity", quantity=5,  price=155.0, date="2020-01-10")
+    print(port.summary())
+    """
+
+    def __init__(
+        self,
+        initial_capital  : float = 100_000.0,
+        transaction_cost : float = 0.001,    # 0.1%
+        slippage_pct     : float = 0.0005,   # 0.05%
+        max_position_pct : float = 0.20,     # 20% per asset
+    ):
+        self.initial_capital   = initial_capital
+        self.cash              = initial_capital
+        self.transaction_cost  = transaction_cost
+        self.slippage_pct      = slippage_pct
+        self.max_position_pct  = max_position_pct
+
+        # {asset_name: shares_held}
+        self._positions : dict[str, float] = {}
+        # {asset_name: average_cost_basis}
+        self._cost_basis: dict[str, float] = {}
+
+        # Audit trail
+        self._trade_log   : list[TradeRecord]       = []
+        self._nav_history : list[PortfolioSnapshot] = []
+
+        self._prev_nav = initial_capital
+
+    # ── Properties ────────────────────────────────────────────────────────────
+
+    @property
+    def positions(self) -> dict[str, float]:
+        return dict(self._positions)
+
+    @property
+    def trade_log(self) -> pd.DataFrame:
+        """Return full trade history as a DataFrame."""
+        if not self._trade_log:
+            return pd.DataFrame()
+        return pd.DataFrame([t.__dict__ for t in self._trade_log])
+
+    @property
+    def nav_history(self) -> pd.DataFrame:
+        """Return NAV history as a DataFrame."""
+        if not self._nav_history:
+            return pd.DataFrame()
+        records = []
+        for s in self._nav_history:
+            records.append({
+                "date"    : s.date,
+                "cash"    : s.cash,
+                "nav"     : s.nav,
+                "returns" : s.returns,
+                **{f"pos_{k}": v for k, v in s.positions.items()},
+            })
+        return pd.DataFrame(records)
+
+    # ── NAV Calculation ───────────────────────────────────────────────────────
+
+    def compute_nav(self, prices: dict[str, float]) -> float:
+        """
+        NAV = cash + sum(shares_held[asset] * price[asset])
+
+        Parameters
+        ----------
+        prices : {asset_name: current_price}  — snapshot for today
+        """
+        position_value = sum(
+            self._positions.get(asset, 0) * price
+            for asset, price in prices.items()
+        )
+        return self.cash + position_value
+
+    def record_snapshot(
+        self,
+        date  : object,
+        prices: dict[str, float],
+    ) -> float:
+        """
+        Record today's NAV snapshot. Call this once per simulation day.
+
+        Returns today's NAV.
+        """
+        nav     = self.compute_nav(prices)
+        ret     = (nav - self._prev_nav) / self._prev_nav if self._prev_nav else 0.0
+        self._prev_nav = nav
+
+        snap = PortfolioSnapshot(
+            date      = date,
+            cash      = self.cash,
+            positions = dict(self._positions),
+            prices    = dict(prices),
+            nav       = nav,
+            returns   = ret,
+        )
+        self._nav_history.append(snap)
+        return nav
+
+    # ── Order Execution ───────────────────────────────────────────────────────
+
+    def _apply_slippage(self, price: float, action: str) -> float:
+        """
+        Add slippage to execution price.
+        BUY  orders pay slightly more  (market moves against you).
+        SELL orders receive slightly less.
+        """
+        direction = 1 if action == "BUY" else -1
+        return price * (1 + direction * self.slippage_pct)
+
+    def _apply_commission(self, trade_value: float) -> float:
+        """Commission = trade_value * transaction_cost_rate"""
+        return abs(trade_value) * self.transaction_cost
+
+    def _check_capital_shortfall(self, required: float, label: str):
+        """Raise an informative error if insufficient cash (Issue 15)."""
+        if self.cash < required:
+            raise ValueError(
+                f"[portfolio] Capital shortfall on {label}: "
+                f"need ${required:,.2f}, have ${self.cash:,.2f}"
+            )
+
+    def _check_position_limit(self, asset: str, cost: float, prices: dict[str, float]):
+        """Raise if new position would exceed max_position_pct of NAV (Issue 9)."""
+        if not prices:
+            return
+        nav = self.compute_nav(prices)
+        existing_val = self._positions.get(asset, 0) * prices.get(asset, 0)
+        if (existing_val + cost) / nav > self.max_position_pct:
+            raise ValueError(
+                f"[portfolio] Position limit breach for {asset}: "
+                f"would exceed {self.max_position_pct:.0%} of NAV=${nav:,.2f}"
+            )
+
+    def buy(
+        self,
+        asset    : str,
+        quantity : float,
+        price    : float,
+        date     : object,
+        reason   : str = "",
+        prices   : Optional[dict] = None,
+    ) -> TradeRecord:
+        """
+        Execute a BUY order.
+
+        Parameters
+        ----------
+        asset    : asset identifier (e.g. "Equity", "Gold")
+        quantity : number of shares / units to buy
+        price    : raw market price
+        date     : trade date (for audit log)
+        reason   : human-readable reason (signal, rule)
+        prices   : full {asset:price} dict for position limit check
+        """
+        exec_price  = self._apply_slippage(price, "BUY")
+        trade_value = exec_price * quantity
+        commission  = self._apply_commission(trade_value)
+        total_cost  = trade_value + commission
+
+        # Guard rails
+        self._check_capital_shortfall(total_cost, f"BUY {quantity} {asset}")
+        if prices:
+            self._check_position_limit(asset, trade_value, prices)
+
+        # Update state
+        cash_before     = self.cash
+        self.cash      -= total_cost
+        self._positions[asset] = self._positions.get(asset, 0) + quantity
+
+        # Update cost basis (weighted average)
+        prev_shares = self._positions.get(asset, 0) - quantity
+        prev_cost   = self._cost_basis.get(asset, exec_price)
+        if prev_shares > 0:
+            self._cost_basis[asset] = (
+                (prev_shares * prev_cost + quantity * exec_price)
+                / self._positions[asset]
+            )
+        else:
+            self._cost_basis[asset] = exec_price
+
+        record = TradeRecord(
+            date        = date,
+            asset       = asset,
+            action      = "BUY",
+            quantity    = quantity,
+            price       = exec_price,
+            raw_price   = price,
+            slippage    = (exec_price - price) * quantity,
+            commission  = commission,
+            cash_before = cash_before,
+            cash_after  = self.cash,
+            reason      = reason,
+        )
+        self._trade_log.append(record)
+        return record
+
+    def sell(
+        self,
+        asset    : str,
+        quantity : float,
+        price    : float,
+        date     : object,
+        reason   : str = "",
+    ) -> TradeRecord:
+        """
+        Execute a SELL order.
+
+        Parameters
+        ----------
+        asset    : asset identifier
+        quantity : number of shares / units to sell
+        price    : raw market price
+        date     : trade date (for audit log)
+        reason   : human-readable reason (signal, rule)
+        """
+        held = self._positions.get(asset, 0)
+        if quantity > held:
+            raise ValueError(
+                f"[portfolio] Cannot sell {quantity} {asset} — only {held} held"
+            )
+
+        exec_price  = self._apply_slippage(price, "SELL")
+        trade_value = exec_price * quantity
+        commission  = self._apply_commission(trade_value)
+        proceeds    = trade_value - commission
+
+        cash_before  = self.cash
+        self.cash   += proceeds
+        self._positions[asset] = held - quantity
+        if self._positions[asset] == 0:
+            del self._positions[asset]
+            del self._cost_basis[asset]
+
+        record = TradeRecord(
+            date        = date,
+            asset       = asset,
+            action      = "SELL",
+            quantity    = quantity,
+            price       = exec_price,
+            raw_price   = price,
+            slippage    = (price - exec_price) * quantity,
+            commission  = commission,
+            cash_before = cash_before,
+            cash_after  = self.cash,
+            reason      = reason,
+        )
+        self._trade_log.append(record)
+        return record
+
+    def rebalance(
+        self,
+        target_weights : dict[str, float],
+        prices         : dict[str, float],
+        date           : object,
+        reason         : str = "rebalance",
+    ):
+        """
+        Rebalance to target weight allocation (Issue 11).
+
+        Parameters
+        ----------
+        target_weights : {asset: target_fraction_of_NAV}  — must sum to <= 1.0
+        prices         : {asset: current_price}
+        date           : rebalance date
+
+        Example
+        -------
+        port.rebalance(
+            target_weights = {"Equity": 0.60, "Gold": 0.20, "Bonds": 0.10},
+            prices         = {"Equity": 150, "Gold": 1800, "Bonds": 100},
+            date           = "2020-06-30"
+        )
+        """
+        assert sum(target_weights.values()) <= 1.001, "Weights must sum to <= 1"
+
+        nav = self.compute_nav(prices)
+        
+        # Calculate target shares and deltas
+        deltas = {}
+        for asset, weight in target_weights.items():
+            target_value  = nav * weight
+            current_price = prices.get(asset)
+            if current_price is None or current_price <= 0:
+                continue
+
+            target_shares  = target_value / current_price
+            current_shares = self._positions.get(asset, 0)
+            deltas[asset] = target_shares - current_shares
+
+        # Sell first to free up cash
+        for asset, delta in deltas.items():
+            if delta < -0.001:
+                self.sell(asset, abs(delta), prices[asset], date, reason)
+                
+        # Then buy with available cash
+        for asset, delta in deltas.items():
+            if delta > 0.001:
+                # Adjust delta if we don't have enough cash (due to slippage/commission buffer)
+                required_cash = delta * prices[asset] * (1 + self.slippage_pct + self.transaction_cost)
+                if required_cash > self.cash:
+                    delta = (self.cash * 0.99) / (prices[asset] * (1 + self.slippage_pct + self.transaction_cost))
+                
+                if delta > 0.001:
+                    self.buy(asset, delta, prices[asset], date, reason, prices)
+
+    # ── Summary ───────────────────────────────────────────────────────────────
+
+    def summary(self, prices: Optional[dict] = None) -> str:
+        nav = self.compute_nav(prices) if prices else None
+        lines = [
+            "=" * 50,
+            "  PORTFOLIO SUMMARY",
+            "=" * 50,
+            f"  Initial Capital : ${self.initial_capital:>12,.2f}",
+            f"  Cash            : ${self.cash:>12,.2f}",
+        ]
+        for asset, shares in self._positions.items():
+            cb = self._cost_basis.get(asset, 0)
+            lines.append(f"  {asset:<14} : {shares:>8.2f} shares @ avg ${cb:.2f}")
+        if nav:
+            pnl = nav - self.initial_capital
+            pnl_pct = pnl / self.initial_capital
+            lines += [
+                f"  NAV             : ${nav:>12,.2f}",
+                f"  PnL             : ${pnl:>12,.2f}  ({pnl_pct:.2%})",
+            ]
+        lines += [
+            f"  Total Trades    : {len(self._trade_log)}",
+            "=" * 50,
+        ]
+        return "\n".join(lines)
+
+    def get_returns(self) -> pd.Series:
+        """Extract daily NAV returns from nav_history."""
+        hist = self.nav_history
+        if hist.empty:
+            return pd.Series(dtype=float)
+        return hist.set_index("date")["returns"]
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Entry point — quick demo
+# ─────────────────────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    port = Portfolio(initial_capital=100_000, transaction_cost=0.001, slippage_pct=0.0005)
+
+    # Day 1: buy 100 shares of Equity at $100
+    port.buy("Equity", 100, 100.0, "2020-01-02", reason="Initial allocation")
+    port.record_snapshot("2020-01-02", {"Equity": 100.0})
+
+    # Day 2: price rises to $105
+    port.record_snapshot("2020-01-03", {"Equity": 105.0})
+
+    # Day 3: buy 50 more shares
+    port.buy("Equity", 50, 105.0, "2020-01-04", reason="Momentum signal")
+    port.record_snapshot("2020-01-04", {"Equity": 105.0})
+
+    # Day 4: sell 80 shares at $108
+    port.sell("Equity", 80, 108.0, "2020-01-07", reason="Take profit")
+    port.record_snapshot("2020-01-07", {"Equity": 108.0})
+
+    print(port.summary(prices={"Equity": 108.0}))
+    print()
+    print("Trade Log:")
+    print(port.trade_log.to_string())
+    print()
+    print("NAV History:")
+    print(port.nav_history[["date", "cash", "nav", "returns"]].to_string())
diff --git a/src/preprocessing.py b/src/preprocessing.py
new file mode 100644
index 00000000..847654ae
--- /dev/null
+++ b/src/preprocessing.py
@@ -0,0 +1,178 @@
+"""
+preprocessing.py
+----------------
+Cleans the master DataFrame and engineers features needed for
+risk modelling and signal generation.
+
+Steps:
+    1. Drop / impute warmup-period NaNs
+    2. Winsorize extreme returns  (+/-3 sigma)
+    3. Normalize macro columns (Z-score)
+    4. Engineer rolling features:
+       - Equity rolling volatility (20-day)
+       - Equity momentum          (10-day cumulative return)
+       - Macro rate-of-change
+       - Cross-asset rolling correlation (equity vs gold/oil)
+"""
+
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+
+
+# ── 1. NaN Handling ───────────────────────────────────────────────────────────
+
+def drop_warmup_nans(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Drop the first rows that have NaN due to rolling-window warmup.
+    The largest warmup window is 10 days (SMA_10, Oil_Volatility).
+    """
+    before = len(df)
+    df = df.dropna(subset=["Equity_SMA10", "Oil_Volatility"]).copy()
+    after = len(df)
+    print(f"[preprocess] Dropped {before - after} warmup rows  ({before} -> {after})")
+    return df.reset_index(drop=True)
+
+
+def impute_remaining_nans(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Forward-fill any remaining NaNs (e.g. mid-series gaps).
+    Returns columns filled are zero for returns, ffill for prices.
+    """
+    return_cols  = [c for c in df.columns if "Returns" in c]
+    price_cols   = [c for c in df.columns if "Price" in c or "SMA" in c
+                    or "Bonds" in c or "Index" in c]
+
+    df[return_cols] = df[return_cols].fillna(0)
+    df[price_cols]  = df[price_cols].ffill()
+    df = df.ffill()  # catch anything remaining
+    return df
+
+
+# ── 2. Outlier Handling ───────────────────────────────────────────────────────
+
+def winsorize_returns(df: pd.DataFrame, sigma: float = 3.0) -> pd.DataFrame:
+    """
+    Clip extreme daily returns to +/- `sigma` standard deviations.
+    Adds  _clean  suffix columns so originals are preserved.
+    """
+    return_cols = [c for c in df.columns if "Returns" in c]
+    for col in return_cols:
+        series = df[col].dropna()
+        lo = series.mean() - sigma * series.std()
+        hi = series.mean() + sigma * series.std()
+        df[f"{col}_clean"] = df[col].clip(lo, hi)
+        n_clipped = ((df[col] < lo) | (df[col] > hi)).sum()
+        if n_clipped:
+            print(f"[preprocess] Winsorized {n_clipped:>4} rows in {col}")
+    return df
+
+
+# ── 3. Normalisation ──────────────────────────────────────────────────────────
+
+MACRO_COLS = ["Inflation", "Interest_Rate", "USD_Index", "Sentiment"]
+
+def normalize_macro(df: pd.DataFrame) -> tuple[pd.DataFrame, StandardScaler]:
+    """
+    Z-score normalize macro columns.
+    Returns the modified DataFrame AND the fitted scaler (for inverse-transform later).
+    """
+    scaler = StandardScaler()
+    df[[f"{c}_norm" for c in MACRO_COLS]] = scaler.fit_transform(df[MACRO_COLS])
+    print(f"[preprocess] Normalized {MACRO_COLS}")
+    return df, scaler
+
+
+# ── 4. Feature Engineering ────────────────────────────────────────────────────
+
+def add_rolling_features(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Add derived features on top of the raw data:
+
+    Equity:
+        - Equity_RollingVol20   : 20-day rolling std of returns
+        - Equity_Momentum10     : 10-day cumulative return
+        - Equity_Momentum30     : 30-day cumulative return
+
+    Macro:
+        - Rate_of_Change columns for Inflation & Interest_Rate
+
+    Cross-asset:
+        - Corr_Equity_Gold30    : 30-day rolling correlation
+        - Corr_Equity_Oil30     : 30-day rolling correlation
+    """
+    r = "Equity_Returns_clean"
+
+    # -- Equity volatility --
+    df["Equity_RollingVol20"] = df[r].rolling(20).std() * np.sqrt(252)  # annualised
+
+    # -- Momentum (sum of returns over window) --
+    df["Equity_Momentum10"] = df[r].rolling(10).sum()
+    df["Equity_Momentum30"] = df[r].rolling(30).sum()
+
+    # -- Macro rate of change --
+    df["Inflation_RoC"]      = df["Inflation"].pct_change()
+    df["InterestRate_RoC"]   = df["Interest_Rate"].pct_change()
+
+    # -- Cross-asset correlations --
+    df["Corr_Equity_Gold30"] = (
+        df["Equity_Returns_clean"]
+        .rolling(30)
+        .corr(df["MA_Gold_Returns_clean"])
+    )
+    df["Corr_Equity_Oil30"] = (
+        df["Equity_Returns_clean"]
+        .rolling(30)
+        .corr(df["Oil_Returns_clean"])
+    )
+
+    # Drop the short rolling-window warmup NaNs from new features
+    df = df.dropna(subset=["Equity_RollingVol20", "Equity_Momentum30",
+                            "Corr_Equity_Gold30"]).reset_index(drop=True)
+
+    print(f"[preprocess] Feature engineering done. Shape: {df.shape}")
+    return df
+
+
+# ── Master Preprocess Pipeline ────────────────────────────────────────────────
+
+def preprocess(raw_df: pd.DataFrame) -> tuple[pd.DataFrame, StandardScaler]:
+    """
+    Full pipeline:
+        raw_df  ->  cleaned + normalised + feature-engineered DataFrame
+
+    Returns
+    -------
+    df     : processed DataFrame
+    scaler : fitted StandardScaler (for macro cols)
+    """
+    df = drop_warmup_nans(raw_df)
+    df = impute_remaining_nans(df)
+    df = winsorize_returns(df)
+    df, scaler = normalize_macro(df)
+    df = add_rolling_features(df)
+    return df, scaler
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    import sys
+    from pathlib import Path
+    sys.path.insert(0, str(Path(__file__).parent))
+
+    from ingestion import load_master
+    raw = load_master()
+
+    df, scaler = preprocess(raw)
+
+    print(f"\nFinal dataset shape : {df.shape}")
+    print(f"Date range          : {df['Date'].min().date()} -> {df['Date'].max().date()}")
+    print(f"\nNew feature columns :")
+    new_cols = [c for c in df.columns if any(x in c for x in
+                ["Vol", "Momentum", "RoC", "Corr", "clean", "norm"])]
+    for c in new_cols:
+        print(f"  {c}")
+
+    remaining_nans = df.isnull().sum().sum()
+    print(f"\nRemaining NaNs: {remaining_nans}")
diff --git a/src/risk.py b/src/risk.py
new file mode 100644
index 00000000..2bbbf56a
--- /dev/null
+++ b/src/risk.py
@@ -0,0 +1,494 @@
+"""
+risk.py
+-------
+Risk metrics for the hedge fund system.
+
+Covers Issues 6, 7, 12, 13 from ISSUES.md:
+    - Issue  6 : Value at Risk (VaR)  -- Historical + Parametric + CVaR
+    - Issue  7 : Max Drawdown & Portfolio Volatility
+    - Issue 12 : Sharpe Ratio (risk-adjusted return)
+    - Issue 13 : Alpha & Beta vs a benchmark
+
+All functions operate on a pandas Series of daily returns
+(or a DataFrame for multi-asset calculations).
+"""
+
+from __future__ import annotations
+
+import numpy as np
+import pandas as pd
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Data containers
+# ─────────────────────────────────────────────────────────────────────────────
+
+@dataclass
+class VaRResult:
+    """Value at Risk and Conditional VaR at a given confidence level."""
+    confidence   : float          # e.g. 0.95 for 95%
+    historical   : float          # historical simulation VaR
+    parametric   : float          # Gaussian parametric VaR
+    cvar         : float          # Conditional VaR (Expected Shortfall)
+    period_days  : int            # number of daily return observations used
+
+    def __str__(self) -> str:
+        return (
+            f"VaR ({self.confidence:.0%} conf, {self.period_days}d)\n"
+            f"  Historical : {self.historical:.4%}\n"
+            f"  Parametric : {self.parametric:.4%}\n"
+            f"  CVaR (ES)  : {self.cvar:.4%}"
+        )
+
+
+@dataclass
+class DrawdownResult:
+    """Drawdown statistics for a returns series."""
+    max_drawdown       : float   # worst peak-to-trough drop
+    max_dd_start       : object  # date when drawdown started
+    max_dd_end         : object  # date of trough
+    avg_drawdown       : float   # average of all drawdown periods
+    recovery_days      : int     # calendar days to recover from worst drawdown
+    drawdown_series    : pd.Series = field(repr=False)   # full daily drawdown values
+
+    def __str__(self) -> str:
+        return (
+            f"Drawdown Analysis\n"
+            f"  Max Drawdown  : {self.max_drawdown:.4%}\n"
+            f"  DD Start      : {self.max_dd_start}\n"
+            f"  DD Trough     : {self.max_dd_end}\n"
+            f"  Recovery Days : {self.recovery_days}\n"
+            f"  Avg Drawdown  : {self.avg_drawdown:.4%}"
+        )
+
+
+@dataclass
+class RiskMetrics:
+    """Complete risk/return profile for a returns series."""
+    # Return metrics
+    total_return       : float
+    ann_return         : float
+    ann_volatility     : float
+
+    # Risk-adjusted
+    sharpe_ratio       : float
+    sortino_ratio      : float
+    calmar_ratio       : float
+
+    # Risk
+    var_95             : VaRResult
+    var_99             : VaRResult
+    drawdown           : DrawdownResult
+
+    # Alpha / Beta vs benchmark
+    alpha              : Optional[float] = None
+    beta               : Optional[float] = None
+    r_squared          : Optional[float] = None
+
+    def summary(self) -> str:
+        lines = [
+            "=" * 50,
+            "  RISK METRICS SUMMARY",
+            "=" * 50,
+            f"  Total Return     : {self.total_return:.4%}",
+            f"  Annualised Ret   : {self.ann_return:.4%}",
+            f"  Annualised Vol   : {self.ann_volatility:.4%}",
+            f"  Sharpe Ratio     : {self.sharpe_ratio:.4f}",
+            f"  Sortino Ratio    : {self.sortino_ratio:.4f}",
+            f"  Calmar Ratio     : {self.calmar_ratio:.4f}",
+            f"  VaR 95% (hist)   : {self.var_95.historical:.4%}",
+            f"  VaR 99% (hist)   : {self.var_99.historical:.4%}",
+            f"  CVaR 95%         : {self.var_95.cvar:.4%}",
+            f"  Max Drawdown     : {self.drawdown.max_drawdown:.4%}",
+        ]
+        if self.alpha is not None:
+            lines += [
+                f"  Alpha            : {self.alpha:.6f}",
+                f"  Beta             : {self.beta:.4f}",
+                f"  R-squared        : {self.r_squared:.4f}",
+            ]
+        lines.append("=" * 50)
+        return "\n".join(lines)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Issue 6: Value at Risk
+# ─────────────────────────────────────────────────────────────────────────────
+
+def calculate_var(
+    returns  : pd.Series,
+    confidence: float = 0.95,
+    dates    : Optional[pd.Series] = None,
+) -> VaRResult:
+    """
+    Compute VaR and CVaR using two methods:
+
+    Historical Simulation
+        Sort the observed returns and take the appropriate quantile.
+        No distributional assumption — uses actual fat tails.
+
+    Parametric (Gaussian)
+        VaR = mu - z * sigma  where z is the inverse-normal quantile.
+        Fast but underestimates tail risk for fat-tailed distributions.
+
+    CVaR / Expected Shortfall
+        Average of all returns that breach the VaR threshold.
+        A coherent risk measure — always used alongside VaR.
+
+    Parameters
+    ----------
+    returns    : daily returns Series (already cleaned / winsorized)
+    confidence : e.g. 0.95 or 0.99
+    dates      : optional Date Series (same index as returns)
+
+    Returns
+    -------
+    VaRResult dataclass
+    """
+    r = returns.dropna()
+    alpha = 1.0 - confidence            # left-tail probability
+
+    # -- Historical --
+    hist_var = float(r.quantile(alpha))
+
+    # -- Parametric --
+    from scipy.stats import norm
+    mu, sigma = r.mean(), r.std()
+    param_var = float(mu + norm.ppf(alpha) * sigma)
+
+    # -- CVaR (Expected Shortfall) --
+    cvar = float(r[r <= hist_var].mean())
+
+    return VaRResult(
+        confidence  = confidence,
+        historical  = hist_var,
+        parametric  = param_var,
+        cvar        = cvar,
+        period_days = len(r),
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Issue 7: Drawdown & Volatility
+# ─────────────────────────────────────────────────────────────────────────────
+
+def calculate_drawdown(
+    returns : pd.Series,
+    dates   : Optional[pd.Series] = None,
+) -> DrawdownResult:
+    """
+    Compute the drawdown series and extract key drawdown statistics.
+
+    The drawdown at time t is:
+        DD(t) = (NAV(t) - peak NAV up to t) / peak NAV up to t
+
+    Parameters
+    ----------
+    returns : daily returns Series
+    dates   : optional Date Series for labelling start/end
+
+    Returns
+    -------
+    DrawdownResult dataclass
+    """
+    r = returns.dropna().reset_index(drop=True)
+    idx = dates.reset_index(drop=True) if dates is not None else pd.RangeIndex(len(r))
+
+    # Net Asset Value (NAV) from $1
+    nav     = (1 + r).cumprod()
+    peak    = nav.cummax()
+    dd_ser  = (nav - peak) / peak
+
+    max_dd_loc  = dd_ser.idxmin()
+    max_dd_val  = float(dd_ser.iloc[max_dd_loc])
+
+    # Find the start of the worst drawdown (last time NAV was at peak before trough)
+    peak_before_trough = nav.iloc[:max_dd_loc + 1].idxmax()
+
+    # Recovery: first time NAV >= peak after trough
+    nav_after = nav.iloc[max_dd_loc:]
+    peak_val  = float(peak.iloc[max_dd_loc])
+    recovery_mask = nav_after >= peak_val
+    recovery_loc  = recovery_mask.idxmax() if recovery_mask.any() else None
+
+    if recovery_loc is not None and recovery_mask.any():
+        rec_days = int(recovery_loc - max_dd_loc)
+    else:
+        rec_days = -1   # still in drawdown at end of dataset
+
+    # Average drawdown (only negative periods)
+    avg_dd = float(dd_ser[dd_ser < 0].mean()) if (dd_ser < 0).any() else 0.0
+
+    return DrawdownResult(
+        max_drawdown    = max_dd_val,
+        max_dd_start    = idx.iloc[peak_before_trough] if hasattr(idx, 'iloc') else peak_before_trough,
+        max_dd_end      = idx.iloc[max_dd_loc]         if hasattr(idx, 'iloc') else max_dd_loc,
+        avg_drawdown    = avg_dd,
+        recovery_days   = rec_days,
+        drawdown_series = dd_ser,
+    )
+
+
+def portfolio_volatility(returns: pd.Series, ann_factor: int = 252) -> float:
+    """
+    Annualised portfolio volatility = daily_std * sqrt(ann_factor).
+
+    ann_factor = 252  for daily returns (trading days per year)
+    """
+    return float(returns.dropna().std() * np.sqrt(ann_factor))
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Issue 12: Sharpe & Sortino & Calmar
+# ─────────────────────────────────────────────────────────────────────────────
+
+def sharpe_ratio(
+    returns    : pd.Series,
+    risk_free  : float = 0.0,
+    ann_factor : int   = 252,
+) -> float:
+    """
+    Annualised Sharpe Ratio.
+
+    Sharpe = (E[R] - Rf) / sigma(R)  * sqrt(ann_factor)
+
+    Parameters
+    ----------
+    returns    : daily returns
+    risk_free  : daily risk-free rate (default 0 %)
+    ann_factor : trading days per year
+    """
+    r       = returns.dropna()
+    excess  = r - risk_free
+    vol     = excess.std()
+    if vol == 0:
+        return 0.0
+    return float((excess.mean() / vol) * np.sqrt(ann_factor))
+
+
+def sortino_ratio(
+    returns    : pd.Series,
+    risk_free  : float = 0.0,
+    ann_factor : int   = 252,
+) -> float:
+    """
+    Annualised Sortino Ratio.
+
+    Like Sharpe but only penalises DOWNSIDE volatility — better for
+    strategies with skewed return distributions.
+
+    Sortino = (E[R] - Rf) / downside_std  * sqrt(ann_factor)
+    """
+    r           = returns.dropna()
+    excess      = r - risk_free
+    downside    = excess[excess < 0]
+    down_vol    = downside.std() if len(downside) > 1 else 1e-9
+    return float((excess.mean() / down_vol) * np.sqrt(ann_factor))
+
+
+def calmar_ratio(
+    returns    : pd.Series,
+    ann_factor : int = 252,
+) -> float:
+    """
+    Calmar Ratio = Annualised Return / |Max Drawdown|
+
+    Good for evaluating strategies that may have infrequent but
+    severe drawdowns (e.g. trend-following).
+    """
+    ann_ret = returns.dropna().mean() * ann_factor
+    dd      = calculate_drawdown(returns)
+    if dd.max_drawdown == 0:
+        return 0.0
+    return float(ann_ret / abs(dd.max_drawdown))
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Issue 13: Alpha & Beta
+# ─────────────────────────────────────────────────────────────────────────────
+
+def alpha_beta(
+    portfolio_returns  : pd.Series,
+    benchmark_returns  : pd.Series,
+    risk_free          : float = 0.0,
+    ann_factor         : int   = 252,
+) -> tuple[float, float, float]:
+    """
+    Compute CAPM Alpha, Beta, and R-squared.
+
+    Model:
+        R_p - Rf = alpha + beta * (R_b - Rf) + epsilon
+
+    Parameters
+    ----------
+    portfolio_returns : daily portfolio returns
+    benchmark_returns : daily benchmark returns (e.g. equity index)
+    risk_free         : daily risk-free rate
+    ann_factor        : for annualising alpha
+
+    Returns
+    -------
+    (alpha, beta, r_squared)
+
+    Interpretation
+    --------------
+    beta  > 1  : portfolio moves more than the market
+    beta  < 1  : portfolio is less volatile than the market
+    beta  < 0  : portfolio moves inversely to the market
+    alpha > 0  : positive excess return above market risk-adjusted expectation
+    """
+    # Align and drop NaN
+    df = pd.DataFrame({"p": portfolio_returns, "b": benchmark_returns}).dropna()
+    ep = df["p"] - risk_free
+    eb = df["b"] - risk_free
+
+    # OLS: ep = alpha + beta * eb
+    cov_matrix = np.cov(ep, eb)
+    beta_val   = float(cov_matrix[0, 1] / cov_matrix[1, 1]) if cov_matrix[1, 1] != 0 else 0.0
+    alpha_daily = float(ep.mean() - beta_val * eb.mean())
+    alpha_ann   = alpha_daily * ann_factor    # annualised
+
+    # R-squared
+    corr = float(ep.corr(eb))
+    r_sq = corr ** 2
+
+    return alpha_ann, beta_val, r_sq
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Master: compute all risk metrics in one call
+# ─────────────────────────────────────────────────────────────────────────────
+
+def compute_all_risk_metrics(
+    returns           : pd.Series,
+    benchmark_returns : Optional[pd.Series] = None,
+    dates             : Optional[pd.Series] = None,
+    risk_free_annual  : float = 0.02,
+    ann_factor        : int   = 252,
+) -> RiskMetrics:
+    """
+    Convenience wrapper: computes every risk metric in one call.
+
+    Parameters
+    ----------
+    returns           : daily portfolio returns (winsorized / cleaned)
+    benchmark_returns : daily benchmark returns for alpha/beta (optional)
+    dates             : Date series aligned with returns
+    risk_free_annual  : annual risk-free rate (default 2%)
+    ann_factor        : trading days per year (252)
+
+    Returns
+    -------
+    RiskMetrics dataclass with .summary() method
+    """
+    rf_daily = risk_free_annual / ann_factor
+
+    r = returns.dropna()
+
+    # Returns
+    total_ret = float((1 + r).prod() - 1)
+    ann_ret   = float(r.mean() * ann_factor)
+    ann_vol   = portfolio_volatility(r, ann_factor)
+
+    # Risk-adjusted
+    sh  = sharpe_ratio (r, rf_daily, ann_factor)
+    so  = sortino_ratio(r, rf_daily, ann_factor)
+    cal = calmar_ratio (r, ann_factor)
+
+    # VaR
+    v95 = calculate_var(r, 0.95, dates)
+    v99 = calculate_var(r, 0.99, dates)
+
+    # Drawdown
+    dd  = calculate_drawdown(r, dates)
+
+    # Alpha / Beta
+    a, b, r2 = None, None, None
+    if benchmark_returns is not None:
+        a, b, r2 = alpha_beta(r, benchmark_returns, rf_daily, ann_factor)
+
+    return RiskMetrics(
+        total_return   = total_ret,
+        ann_return     = ann_ret,
+        ann_volatility = ann_vol,
+        sharpe_ratio   = sh,
+        sortino_ratio  = so,
+        calmar_ratio   = cal,
+        var_95         = v95,
+        var_99         = v99,
+        drawdown       = dd,
+        alpha          = a,
+        beta           = b,
+        r_squared      = r2,
+    )
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Rolling risk  (for dashboard / live monitoring)
+# ─────────────────────────────────────────────────────────────────────────────
+
+def rolling_var(
+    returns    : pd.Series,
+    window     : int   = 252,
+    confidence : float = 0.95,
+) -> pd.Series:
+    """Rolling Historical VaR over a sliding window."""
+    alpha = 1.0 - confidence
+    return returns.rolling(window).quantile(alpha)
+
+
+def rolling_sharpe(
+    returns    : pd.Series,
+    window     : int   = 252,
+    risk_free  : float = 0.0,
+    ann_factor : int   = 252,
+) -> pd.Series:
+    """Rolling annualised Sharpe Ratio."""
+    excess = returns - risk_free
+    roll_mean = excess.rolling(window).mean()
+    roll_std  = excess.rolling(window).std()
+    return (roll_mean / roll_std) * np.sqrt(ann_factor)
+
+
+def rolling_volatility(
+    returns    : pd.Series,
+    window     : int = 20,
+    ann_factor : int = 252,
+) -> pd.Series:
+    """Rolling annualised volatility."""
+    return returns.rolling(window).std() * np.sqrt(ann_factor)
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Entry point — quick self-test
+# ─────────────────────────────────────────────────────────────────────────────
+
+if __name__ == "__main__":
+    import sys
+    from pathlib import Path
+    sys.path.insert(0, str(Path(__file__).parent))
+
+    from ingestion     import load_master
+    from preprocessing import preprocess
+
+    raw = load_master()
+    df, _  = preprocess(raw)
+
+    ret       = df["Equity_Returns_clean"]
+    benchmark = df["Oil_Returns_clean"]      # use oil as benchmark proxy
+    dates     = df["Date"]
+
+    metrics = compute_all_risk_metrics(
+        returns           = ret,
+        benchmark_returns = benchmark,
+        dates             = dates,
+        risk_free_annual  = 0.02,
+    )
+
+    print(metrics.summary())
+    print()
+    print(metrics.var_95)
+    print()
+    print(metrics.drawdown)
diff --git a/src/signals.py b/src/signals.py
new file mode 100644
index 00000000..51dc184f
--- /dev/null
+++ b/src/signals.py
@@ -0,0 +1,174 @@
+"""
+signals.py
+----------
+Signal Generation Engine for the semi-automated trading system.
+
+Implements a Risk-Aware Trend Following strategy (Issue 8 & 16) that:
+    1. Detects market momentum (trend)
+    2. Overlays macro sentiment & volatility for risk mitigation
+    3. Outputs target portfolio weights for rebalancing
+
+This fulfills the requirement to build an "explainable" strategy
+that aims to minimize drawdowns while capturing upside.
+"""
+
+import pandas as pd
+from dataclasses import dataclass
+from typing import Dict, Tuple
+
+@dataclass
+class SignalResult:
+    """The output of the signal engine for a single day."""
+    date: object
+    target_weights: Dict[str, float]
+    reason: str
+
+
+class RiskAwareSignalEngine:
+    """
+    A rule-based, explainable signal generator.
+    
+    Logic:
+        - Base: Follow Equity momentum (30-day). If positive, go LONG Equity.
+        - Risk Overlay 1: High Volatility. If rolling vol > threshold, reduce Equity, buy Gold.
+        - Risk Overlay 2: Macro Sentiment. If sentiment < threshold, move to Cash/Bonds.
+    """
+    
+    def __init__(
+        self,
+        vol_threshold: float = 0.20,       # 20% annualized vol limit
+        sentiment_threshold: float = -0.5, # negative sentiment threshold (normalized)
+        max_equity_weight: float = 0.90,   # Never go 100% equity
+    ):
+        self.vol_threshold = vol_threshold
+        self.sentiment_threshold = sentiment_threshold
+        self.max_equity_weight = max_equity_weight
+
+    def generate_signal(self, row: pd.Series) -> SignalResult:
+        """
+        Evaluate a single day's data and return target portfolio weights.
+        
+        Parameters
+        ----------
+        row : pd.Series representing one row of the preprocessed DataFrame
+        """
+        # Read current state
+        momentum = row.get("Equity_Momentum30", 0.0)
+        vol      = row.get("Equity_RollingVol20", 0.0)
+        sent     = row.get("Sentiment_norm", 0.0)
+        
+        date = row["Date"]
+
+        # Default: All Cash
+        weights = {"Equity": 0.0, "Gold": 0.0}
+        reasons = []
+
+        # 1. Risk Overlay: Extreme Macro Fear
+        if sent < self.sentiment_threshold:
+            weights["Gold"] = 0.50
+            reasons.append(f"Risk-Off: Sentiment ({sent:.2f}) < {self.sentiment_threshold}")
+            
+        # 2. Risk Overlay: High Volatility
+        elif vol > self.vol_threshold:
+            # Reduce equity exposure, hedge with gold
+            weights["Equity"] = 0.40
+            weights["Gold"]   = 0.40
+            reasons.append(f"De-risk: Volatility ({vol:.1%}) > {self.vol_threshold:.1%}")
+            
+        # 3. Base Strategy: Trend Following
+        else:
+            if momentum > 0:
+                # Up-trend: Max allocation to Equity
+                weights["Equity"] = self.max_equity_weight
+                weights["Gold"]   = 0.0
+                reasons.append(f"Trend-On: Momentum ({momentum:.2%}) > 0")
+            else:
+                # Down-trend: Move to Cash
+                weights["Equity"] = 0.0
+                weights["Gold"]   = 0.0
+                reasons.append(f"Trend-Off: Momentum ({momentum:.2%}) <= 0")
+
+        # Clean up reason string
+        final_reason = " | ".join(reasons) if reasons else "No clear signal, holding cash."
+        
+        return SignalResult(
+            date=date,
+            target_weights=weights,
+            reason=final_reason
+        )
+
+class MLSignalEngine:
+    """
+    An ML-driven signal generator using the Random Forest classifier.
+    
+    Logic:
+        - If ML predicts UP (prob > threshold), allocate strongly to Equity.
+        - If ML predicts DOWN, allocate to Gold.
+        - Also retains the extreme Volatility Risk Overlay for safety.
+    """
+    
+    def __init__(
+        self,
+        vol_threshold: float = 0.25,
+        prob_threshold: float = 0.55,
+        max_equity_weight: float = 0.90,
+    ):
+        import joblib
+        from pathlib import Path
+        
+        self.vol_threshold = vol_threshold
+        self.prob_threshold = prob_threshold
+        self.max_equity_weight = max_equity_weight
+        
+        # Load the model
+        model_path = Path(__file__).resolve().parent.parent / "models" / "rf_model.joblib"
+        if not model_path.exists():
+            raise FileNotFoundError(f"ML Model not found at {model_path}. Please run evaluate_ml.py first to train it.")
+        
+        self.model = joblib.load(model_path)
+        
+    def generate_signal(self, row: pd.Series) -> SignalResult:
+        from ml_model import get_prediction_probability
+        
+        date = row["Date"]
+        vol = row.get("Equity_RollingVol20", 0.0)
+        
+        # Get ML prediction probability for UP (1)
+        up_prob = get_prediction_probability(row, self.model)
+        
+        weights = {"Equity": 0.0, "Gold": 0.0}
+        reasons = []
+        
+        # 1. Extreme Risk Overlay (Trumps ML)
+        if vol > self.vol_threshold:
+            weights["Equity"] = 0.20
+            weights["Gold"]   = 0.50
+            reasons.append(f"De-risk Override: Volatility ({vol:.1%}) > {self.vol_threshold:.1%}")
+            
+        # 2. ML Prediction
+        else:
+            if up_prob >= self.prob_threshold:
+                # Strong conviction UP
+                weights["Equity"] = self.max_equity_weight
+                weights["Gold"]   = 0.0
+                reasons.append(f"ML Long: Up probability {up_prob:.1%} >= {self.prob_threshold:.1%}")
+            elif up_prob <= (1 - self.prob_threshold):
+                # Strong conviction DOWN
+                weights["Equity"] = 0.0
+                weights["Gold"]   = 0.50
+                reasons.append(f"ML Short: Down probability {(1-up_prob):.1%} >= {self.prob_threshold:.1%}")
+            else:
+                # Low conviction -> Hold balanced/cash
+                weights["Equity"] = 0.40
+                weights["Gold"]   = 0.20
+                reasons.append(f"ML Neutral: Prob {up_prob:.1%} (Low conviction)")
+                
+        final_reason = " | ".join(reasons)
+        
+        return SignalResult(
+            date=date,
+            target_weights=weights,
+            reason=final_reason
+        )
+
+