diff --git a/.gitignore b/.gitignore
index 3858e46..2e4027e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,13 +12,15 @@ user_data/*
 !user_data/models
 !user_data/freqaimodels
 !user_data/data/
-!user_data/run_custom.py
 # Downloaded data (from Google Drive) and synthetic test data — not tracked in git
 user_data/data/usstock/
 user_data/data/polymarket/
 user_data/data/portfoliobench/
 # Generated ML artifacts (regenerable via scripts/prepare_event_model.py)
 user_data/data/polymarket_ml/
+user_data/data/polymarket_ml_real/
+# Report / thesis drafts
+report/
 # Local development data, model checkpoints, and notebooks
 mycode/
 # Virtual environment
@@ -130,6 +132,3 @@ target/
 !config_examples/config_freqai.example.json
 
 docker-compose-*.yml
-
-# Workflow results (regenerable via make e2e-*)
-results/
diff --git a/docs/polymarket/README.md b/docs/polymarket/README.md
index 0f16858..3565f93 100644
--- a/docs/polymarket/README.md
+++ b/docs/polymarket/README.md
@@ -127,10 +127,11 @@ Run the preparation script to rebuild everything from the raw BTC price series:
 python scripts/prepare_event_model.py
 ```
 
-This executes three steps automatically:
+This executes four steps automatically:
 
 | Step | What it does | Output |
 |---|---|---|
+| 0 — Build feather files | Generates OHLCV feather files per contract (synthetic by default; see [Using Real Data](#using-real-polymarket-data)) | `*.feather` files in `--output-dir` |
 | 1 — Build training data | Constructs synthetic weekly BTC binary events from `data_1h.csv` | `event_model_training.parquet` |
 | 2 — Train model | Fits and calibrates a logistic regression on the training data | `event_model.pkl` |
 | 3 — Generate predictions | Runs the model at every hourly bar before each contract's expiry | Per-contract `*-event_probs.csv` files |
@@ -151,13 +152,63 @@ python scripts/prepare_event_model.py \
 | Flag | Default | Description |
 |---|---|---|
 | `--btc-csv` | `mycode/data/data_1h.csv` | Path to hourly BTC OHLCV CSV |
-| `--contracts` | `user_data/data/polymarket_contracts/jan20.jsonl` | Contract metadata JSONL |
+| `--contracts` | `user_data/data/polymarket_contracts/jan20.jsonl` | Contract metadata JSONL (ignored when `--use-real-data` is set) |
 | `--output-dir` | `user_data/data/polymarket_ml` | Where all artefacts are written |
 | `--start-date` | `2018-01-01` | Earliest settlement date for training samples |
 | `--end-date` | `2025-06-01` | Latest settlement date (exclusive) |
 | `--val-cutoff` | `2024-01-01` | Events before this date → training; after → validation |
 | `--model-type` | `logistic` | `logistic` (default) or `xgboost` |
+| `--skip-feathers` | off | Skip step 0 if feather files already exist in `--output-dir` |
 | `--skip-training-data` | off | Skip step 1 if the Parquet already exists |
+| `--use-real-data` | off | Step 0: build feathers from real Polymarket trade data instead of synthetic prices |
+| `--parquet-path` | `mycode/data/combined_filtered_data.paquet` | Path to the Polymarket trade-history parquet (only used with `--use-real-data`) |
+
+### Using Real Polymarket Data
+
+By default, step 0 generates **synthetic** OHLCV price series for each contract. If you
+have a Polymarket trade-history parquet file, you can use real market prices instead:
+
+```bash
+python scripts/prepare_event_model.py \
+    --use-real-data \
+    --parquet-path mycode/data/combined_filtered_data.paquet \
+    --output-dir   user_data/data/polymarket_ml_real
+```
+
+This runs `polymarket/real_data_builder.py` which:
+
+1. Loads the parquet and filters to BTC/Bitcoin YES-side rows.
+2. Parses each market's strike (`$88K`, `$90,000`, etc.) and expiry date from the
+   `question` text.
+3. Checks that the last 7-day window has at least 60% hourly coverage (≥ 101 of 168
+   candles).
+4. Forward-fills gaps of up to 6 consecutive hours; excludes contracts with longer gaps.
+5. Writes a freqtrade-compatible `*.feather` file for each qualifying contract.
+6. Serialises all parsed contracts to `real_contracts.jsonl` in the output directory.
+
+The event model and downstream prediction steps (steps 1–3) are unchanged — they work
+with the same feather files regardless of whether they were generated synthetically or
+from real data.
+
+**Parquet format requirements:**
+
+| Column | Type | Description |
+|---|---|---|
+| `timestamp` | datetime (UTC) | Candle open time |
+| `condition_id` | string | Market identifier |
+| `side` | string | `"Yes"` or `"No"` — only YES-side rows are used |
+| `open`, `high`, `low`, `close` | string-encoded float | OHLC prices (0–1 range) |
+| `volume` | int64 | Traded volume |
+| `question` | string | Human-readable market question |
+
+**Coverage thresholds:**
+
+| Gap length | Treatment |
+|---|---|
+| 1–6 hours | Forward-filled (price unchanged) |
+| 7–23 hours | Forward-filled, but a `WARNING` is logged |
+| ≥ 24 hours consecutively | Contract excluded (price too stale) |
+| < 60% of window rows present | Contract excluded |
 
 For the full training reference — feature engineering, label construction, temporal
 splits, calibration, and inference — see [training-guide.md](training-guide.md).
@@ -194,6 +245,37 @@ At each hourly candle, the strategy:
 To override parameters without modifying the strategy file, subclass
 `DualModelPolymarketPortfolio` and set the class attributes.
 
+### Configurable data paths (via config JSON)
+
+Two file paths used by the strategy can be overridden through the freqtrade config JSON
+without touching the strategy source:
+
+| Config key | Default | Description |
+|---|---|---|
+| `contracts_jsonl` | `user_data/data/polymarket_contracts/jan20.jsonl` | Path to the contract metadata JSONL file; relative paths are resolved against `user_data/` |
+| `predictions_dir` | `user_data/data/polymarket_ml` | Directory containing per-contract `*-event_probs.csv` files; relative paths are resolved against `user_data/` |
+
+Example config for a real-data backtest:
+
+```json
+{
+  "contracts_jsonl": "data/polymarket_ml_real/real_contracts.jsonl",
+  "predictions_dir": "data/polymarket_ml_real",
+  "exchange": {
+    "name": "polymarket",
+    "pair_whitelist": [
+      "BTCABOVE108K-SEP5-YES/USDT",
+      "BTCABOVE110K-SEP5-YES/USDT"
+    ]
+  }
+}
+```
+
+Contracts loaded from `real_contracts.jsonl` (produced by `--use-real-data`) may
+contain question patterns that the minimal regex in the synthetic pipeline did not
+support (e.g. "reach $108K"). The loader automatically skips unparseable lines with a
+warning, so a mixed JSONL file will not crash the strategy.
+
 ### Position sizing (Kelly formula)
 
 For a contract priced at `p_market` and model probability `p_model`:
@@ -358,8 +440,21 @@ are:
 ```
 
 The `load_contracts()` function extracts the strike and direction from the `question`
-field using a regex match for `"above $X,XXX"` or `"below $X,XXX"`. Settlement is
-determined from `outcomePrices`.
+field. Supported question patterns include:
+
+| Pattern | Direction | Example |
+|---|---|---|
+| `above $X,XXX` / `above $XXK` | above | "Bitcoin above $90,000 on January 20?" |
+| `reach/hit/exceed/surpass $X,XXX` | above | "Will Bitcoin reach $120,000 by December?" |
+| `below $X,XXX` / `less than $X,XXX` | below | "Bitcoin below $80,000 on March 1?" |
+| `dips to $X,XXX` | below | "Will Bitcoin dip to $70K in October?" |
+
+Strikes can be written with or without a `K`/`k` suffix (e.g. `$88K` = `$88,000`).
+Settlement is determined from `outcomePrices`.
+
+Lines whose question cannot be parsed are skipped with a warning (see `skip_unparseable`
+in `load_contracts`) rather than raising an error, so a JSONL file with mixed contract
+types will not crash the strategy.
 
 Place the file at `user_data/data/polymarket_contracts/<name>.jsonl` and update
 `--contracts` in the prepare script.
@@ -384,13 +479,17 @@ Timestamp,Open,High,Low,Close,Volume
 - Optional on-chain columns: `mvrv`, `hash-rate`, `difficulty` (omit if unavailable;
   the corresponding features will be NaN and effectively ignored by the model).
 
-### 3 — Feather files (synthetic OHLCV)
+### 3 — Feather files (synthetic or real OHLCV)
+
+The backtester requires an OHLCV feather file for each pair. Two sources are supported:
 
-The backtester requires a synthetic OHLCV feather file for each pair. These are
-generated by the existing `build_all_feathers()` function. They are **not** real
-Polymarket quotes — they are synthetic price series used solely to satisfy freqtrade's
-data loading requirements. The strategy ignores OHLCV values other than `close` for
-market price.
+- **Synthetic (default):** generated by `build_all_feathers()` from `data_builder.py`.
+  These are modelled price series used solely to satisfy freqtrade's data loading
+  requirements. The strategy ignores OHLCV values other than `close` for market price.
+- **Real Polymarket data:** generated by `build_all_feathers_from_parquet()` from
+  `real_data_builder.py` (requires a trade-history parquet). These contain actual
+  historical Polymarket prices from the YES-side order book. Use `--use-real-data` in
+  `prepare_event_model.py` to build these instead of synthetic files.
 
 ### 4 — Per-contract predictions
 
diff --git a/docs/polymarket/training-guide.md b/docs/polymarket/training-guide.md
index 80edb5f..8362704 100644
--- a/docs/polymarket/training-guide.md
+++ b/docs/polymarket/training-guide.md
@@ -25,6 +25,21 @@ start.
 ## 1. Architecture Overview
 
 ```
+                ┌─────────────────────────────────────────────┐
+                │  Step 0: OHLCV feather files per contract    │
+                │                                              │
+                │  Option A (default — synthetic):             │
+                │    build_all_feathers()    (data_builder.py) │
+                │    • Log-normal price simulation             │
+                │                                              │
+                │  Option B (--use-real-data):                 │
+                │    build_all_feathers_from_parquet()         │
+                │                    (real_data_builder.py)    │
+                │    • Real Polymarket YES-side prices         │
+                │    • Forward-fill gaps ≤ 6h                  │
+                │    • Writes real_contracts.jsonl             │
+                └─────────────────────────────────────────────┘
+
 data_1h.csv                     (BTC hourly OHLCV, 2018–present)
       │
       ▼
@@ -119,11 +134,24 @@ One JSON object per line. Required fields per contract:
 ```
 
 **Field notes:**
-- `question`: must contain `"above $X,XXX"` or `"below $X,XXX"` for strike extraction.
+- `question`: used for strike and direction extraction. Supported patterns:
+  - `"above $X,XXX"` / `"above $XXK"` — YES if BTC > K at expiry
+  - `"reach/hit/exceed/surpass $X,XXX"` — treated as "above"
+  - `"below $X,XXX"` / `"less than $X,XXX"` / `"dips to $X,XXX"` — YES if BTC < K
+  - K-suffix supported: `$88K` is parsed as `$88,000`
+  - Lines whose `question` cannot be parsed are skipped with a warning when
+    `skip_unparseable=True`; they raise `ValueError` by default.
 - `outcomePrices`: JSON-encoded array `["YES_price", "NO_price"]`. `"1"` = that outcome
   won. Used to determine `settlement` (1.0 or 0.0).
 - `endDate`: ISO-8601 UTC, determines the settlement timestamp `T`.
 
+**Alternative: auto-generated JSONL from real data**
+
+When using `--use-real-data` in `prepare_event_model.py`, the JSONL file is generated
+automatically by `real_data_builder.write_contracts_jsonl()` from the parquet. The
+output `real_contracts.jsonl` uses the same schema and can be loaded by the strategy
+via the `contracts_jsonl` config key.
+
 ---
 
 ## 3. Feature Engineering
@@ -482,12 +510,21 @@ used during fitting. It does not limit when the model can be deployed.
 After running `prepare_event_model.py`, the following files are written to
 `user_data/data/polymarket_ml/`:
 
+**Synthetic mode (default):**
+
 | File | Format | Description |
 |---|---|---|
 | `event_model_training.parquet` | Parquet | 363K+ training samples; all 15 features + label, K, T |
 | `event_model.pkl` | joblib pickle | Model package dict (see below) |
 | `{pair}-event_probs.csv` | CSV | Per-contract fair-value predictions; columns: `dt_utc`, `fair_value` |
-| `{pair}.feather` | Feather | Synthetic OHLCV for backtester (generated by `build_all_feathers`) |
+| `{pair}-1h.feather` | Feather | Synthetic OHLCV for backtester (generated by `build_all_feathers`) |
+
+**Real-data mode (`--use-real-data`):** Same artefacts, plus:
+
+| File | Format | Description |
+|---|---|---|
+| `{pair}-1h.feather` | Feather | Real Polymarket YES-side OHLCV, 7-day window before expiry |
+| `real_contracts.jsonl` | JSONL | Auto-generated contract metadata; compatible with `load_contracts()` |
 
 ### Model package structure
 
diff --git a/freqtrade b/freqtrade
index ab093ff..5fb0011 160000
--- a/freqtrade
+++ b/freqtrade
@@ -1 +1 @@
-Subproject commit ab093ff0e1af445f0b8491ea1168c46e1a51b2c0
+Subproject commit 5fb00116889f8d1d67c817f8aa1eaeecfbbfbfbe
diff --git a/polymarket/contracts.py b/polymarket/contracts.py
index 6ad6bec..5a6a940 100644
--- a/polymarket/contracts.py
+++ b/polymarket/contracts.py
@@ -19,10 +19,14 @@
 from __future__ import annotations
 
 import json
+import logging
 import re
 from dataclasses import dataclass, field
 from pathlib import Path
 
+logger = logging.getLogger(__name__)
+
+
 # ---------------------------------------------------------------------------
 # Pair naming helpers
 # ---------------------------------------------------------------------------
@@ -100,26 +104,44 @@ class ContractMetadata:
 # Strike / direction extraction
 # ---------------------------------------------------------------------------
 
-# Matches "above $90,000", "above $90k", "less than $84,000", etc.
-_ABOVE_RE = re.compile(r"above\s+\$([0-9,]+(?:\.[0-9]+)?)", re.IGNORECASE)
-_BELOW_RE = re.compile(r"(?:less than|below)\s+\$([0-9,]+(?:\.[0-9]+)?)", re.IGNORECASE)
+# Matches "above $90,000", "above $90K", "reach $120,000", "less than $84K", etc.
+# Group 1: numeric part; Group 2: optional K/k suffix.
+_ABOVE_RE = re.compile(
+    r"\b(?:above|reach(?:es)?|hit(?:s)?|exceed(?:s)?|surpass(?:es)?)\s+\$([0-9,]+(?:\.[0-9]+)?)\s*([kK])?(?:\b|$)",
+    re.IGNORECASE,
+)
+_BELOW_RE = re.compile(
+    r"\b(?:less\s+than|below|dip(?:s)?\s+to)\s+\$([0-9,]+(?:\.[0-9]+)?)\s*([kK])?(?:\b|$)",
+    re.IGNORECASE,
+)
 
 
 def _parse_strike_direction(question: str) -> tuple[float, str]:
     """Extract (strike, direction) from a Polymarket question string.
 
+    Handles:
+    - "above $90,000" / "above $90K"
+    - "reach/hit/exceed/surpass $120,000" (treated as 'above')
+    - "less than/below/dips to $84,000" (treated as 'below')
+
     Raises:
         ValueError: If neither 'above' nor 'below/less than' is found.
     """
     m = _ABOVE_RE.search(question)
     if m:
-        strike = float(m.group(1).replace(",", ""))
-        return strike, "above"
+        raw = m.group(1).replace(",", "")
+        has_k = m.group(2) is not None
+        strike = float(raw) * (1_000 if has_k else 1)
+        if strike >= 1_000:
+            return strike, "above"
 
     m = _BELOW_RE.search(question)
     if m:
-        strike = float(m.group(1).replace(",", ""))
-        return strike, "below"
+        raw = m.group(1).replace(",", "")
+        has_k = m.group(2) is not None
+        strike = float(raw) * (1_000 if has_k else 1)
+        if strike >= 1_000:
+            return strike, "below"
 
     raise ValueError(f"Cannot parse strike/direction from question: {question!r}")
 
@@ -149,24 +171,34 @@ def _settlement_from_outcome_prices(outcome_prices_json: str) -> float:
 # Public loader
 # ---------------------------------------------------------------------------
 
-def load_contracts(jsonl_path: str | Path) -> list[ContractMetadata]:
+def load_contracts(
+    jsonl_path: str | Path,
+    *,
+    skip_unparseable: bool = False,
+) -> list[ContractMetadata]:
     """Parse a Polymarket JSONL snapshot into a list of :class:`ContractMetadata`.
 
     Args:
-        jsonl_path: Path to the ``.jsonl`` file (one JSON object per line).
+        jsonl_path:        Path to the ``.jsonl`` file (one JSON object per line).
+        skip_unparseable:  When ``True``, log a warning and skip lines whose
+                           question text cannot be parsed rather than raising.
+                           Useful when loading ``real_contracts.jsonl`` which may
+                           contain exotic question patterns.
 
     Returns:
         List of :class:`ContractMetadata`, sorted by strike ascending.
 
     Raises:
         FileNotFoundError: If ``jsonl_path`` does not exist.
-        ValueError:        If a line cannot be parsed.
+        ValueError:        If a line cannot be parsed and ``skip_unparseable`` is
+                           ``False``.
     """
     path = Path(jsonl_path)
     if not path.exists():
         raise FileNotFoundError(f"Contract metadata file not found: {path}")
 
     contracts: list[ContractMetadata] = []
+    skipped = 0
 
     with path.open() as fh:
         for lineno, line in enumerate(fh, start=1):
@@ -182,6 +214,10 @@ def load_contracts(jsonl_path: str | Path) -> list[ContractMetadata]:
             try:
                 strike, direction = _parse_strike_direction(d["question"])
             except ValueError as exc:
+                if skip_unparseable:
+                    logger.debug("Skipping line %d (%s): %s", lineno, d.get("slug", "?"), exc)
+                    skipped += 1
+                    continue
                 raise ValueError(f"Line {lineno} ({d.get('slug', '?')}): {exc}") from exc
 
             settlement = _settlement_from_outcome_prices(d["outcomePrices"])
@@ -208,5 +244,8 @@ def load_contracts(jsonl_path: str | Path) -> list[ContractMetadata]:
                 )
             )
 
+    if skipped:
+        logger.info("load_contracts: skipped %d unparseable lines in %s", skipped, path.name)
+
     contracts.sort(key=lambda c: c.strike)
     return contracts
diff --git a/polymarket/real_data_builder.py b/polymarket/real_data_builder.py
new file mode 100644
index 0000000..8b66cfc
--- /dev/null
+++ b/polymarket/real_data_builder.py
@@ -0,0 +1,467 @@
+"""Build backtesting feather files from real Polymarket trade data.
+
+This module replaces the synthetic log-normal price generator
+(:mod:`polymarket.synthetic_prices`) with real hourly OHLCV data extracted
+from a Polymarket trade-history parquet file.
+
+The parquet file is expected to have the following columns:
+
+    timestamp    – datetime (UTC-aware)
+    condition_id – market identifier string
+    side         – outcome label, e.g. ``"Yes"`` / ``"No"``
+    open         – string-encoded float price
+    high         – string-encoded float price
+    low          – string-encoded float price
+    close        – string-encoded float price
+    volume       – int64 traded volume
+    question     – human-readable market question
+
+Main entry points
+-----------------
+parse_btc_contracts(df)
+    Scan all rows for BTC binary markets and return a list of parsed
+    :class:`~polymarket.contracts.ContractMetadata` objects.
+
+build_feather_from_real_data(df, contract, output_dir)
+    Extract the 7-day window before expiry for one contract, forward-fill
+    gaps, and write a freqtrade-compatible feather file.
+
+build_all_feathers_from_parquet(parquet_path, output_dir, ...)
+    End-to-end convenience wrapper: load the parquet, parse contracts,
+    build feathers for all qualifying markets.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+from pathlib import Path
+from typing import Sequence
+
+import pandas as pd
+import pyarrow.feather as feather
+
+from polymarket.contracts import ContractMetadata, _make_pair
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+# Backtesting window: last N hours before expiry
+WINDOW_HOURS: int = 168  # 7 days
+
+# Polymarket resolves BTC contracts at noon Eastern = 17:00 UTC
+RESOLUTION_HOUR_UTC: int = 17
+
+# Forward-fill limit: gaps longer than this are left as NaN and the contract
+# is flagged as "sparse" (but still usable if overall coverage ≥ MIN_COVERAGE)
+MAX_FFILL_HOURS: int = 6
+
+# Minimum fraction of hourly candles required in the window
+MIN_COVERAGE: float = 0.60  # 60% = at least 101 of 168 hours
+
+# ---------------------------------------------------------------------------
+# Question parsing
+# ---------------------------------------------------------------------------
+
+# "above $88,000" / "above $88K" / "above $88k"
+# Group 1: digits, Group 2: optional K/k suffix
+_ABOVE_RE = re.compile(r"\babove\s+\$([0-9,]+(?:\.[0-9]+)?)\s*([kK])?(?:\b|$)", re.I)
+# "below $84,000" / "less than $84K" / "dip to $84,000"
+_BELOW_RE = re.compile(
+    r"\b(?:below|less than|dip(?:s)?\s+to)\s+\$([0-9,]+(?:\.[0-9]+)?)\s*([kK])?(?:\b|$)", re.I
+)
+# "reach $150,000" / "reach $100k" / "hit $100k" / "exceed $90,000"
+_REACH_RE = re.compile(
+    r"\b(?:reach|hit|exceed|surpass)\s+\$([0-9,]+(?:\.[0-9]+)?)\s*([kK])?(?:\b|$)", re.I
+)
+
+# Full month names and 3-letter abbreviations
+_MONTH_MAP: dict[str, int] = {
+    "january": 1, "february": 2, "march": 3, "april": 4,
+    "may": 5, "june": 6, "july": 7, "august": 8,
+    "september": 9, "october": 10, "november": 11, "december": 12,
+    "jan": 1, "feb": 2, "mar": 3, "apr": 4,
+    "jun": 6, "jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12,
+}
+
+_DATE_RE = re.compile(
+    r"\b(january|february|march|april|may|june|july|august|"
+    r"september|october|november|december|"
+    r"jan|feb|mar|apr|jun|jul|aug|sep|oct|nov|dec)"
+    r"\s+(\d{1,2})(?:st|nd|rd|th)?(?:[,\s]+(\d{4}))?",
+    re.I,
+)
+
+_YEAR_RE = re.compile(r"\b(20\d{2})\b")
+
+
+def _parse_strike_direction(question: str) -> tuple[float, str] | None:
+    """Return (strike_usd, direction) or None if unparseable."""
+    for pattern, direction in (
+        (_ABOVE_RE, "above"),
+        (_REACH_RE, "above"),   # "reach $X" treated as above (YES if BTC > X)
+        (_BELOW_RE, "below"),
+    ):
+        m = pattern.search(question)
+        if m:
+            raw = m.group(1).replace(",", "")
+            has_k = m.group(2) is not None  # True when K/k suffix was captured
+            strike = float(raw) * (1_000 if has_k else 1)
+            # Skip implausible BTC prices (< $1,000 without K suffix)
+            if strike < 1_000:
+                return None
+            return strike, direction
+    return None
+
+
+def _parse_expiry(question: str, last_timestamp: pd.Timestamp) -> str | None:
+    """Return ISO-8601 UTC expiry string or None if unparseable.
+
+    Resolution time is always 17:00 UTC (Polymarket noon ET rule).
+    If the year is absent from the question we infer it from last_timestamp.
+    """
+    m = _DATE_RE.search(question)
+    if not m:
+        return None
+
+    month_str, day_str, year_str = m.group(1), m.group(2), m.group(3)
+    month = _MONTH_MAP.get(month_str.lower())
+    if month is None:
+        return None
+    day = int(day_str)
+
+    if year_str:
+        year = int(year_str)
+        try:
+            expiry = pd.Timestamp(year=year, month=month, day=day,
+                                  hour=RESOLUTION_HOUR_UTC, tz="UTC")
+        except ValueError:
+            return None
+        return expiry.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+    # No year in question — try years from last_timestamp.year down to -2.
+    # Pick the latest year whose parsed date is ≤ last_timestamp + 3 days
+    # (small slack for contracts whose last candle is just after expiry).
+    slack = pd.Timedelta(days=3)
+    expiry = None
+    for y in range(last_timestamp.year, last_timestamp.year - 3, -1):
+        try:
+            candidate = pd.Timestamp(year=y, month=month, day=day,
+                                     hour=RESOLUTION_HOUR_UTC, tz="UTC")
+        except ValueError:
+            continue
+        if candidate <= last_timestamp + slack:
+            expiry = candidate
+            break
+
+    if expiry is None:
+        return None
+
+    return expiry.strftime("%Y-%m-%dT%H:%M:%SZ")
+
+
+# ---------------------------------------------------------------------------
+# Parquet scanning
+# ---------------------------------------------------------------------------
+
+def parse_btc_contracts(
+    df: pd.DataFrame,
+    min_rows_last7d: int = int(WINDOW_HOURS * MIN_COVERAGE),
+) -> list[ContractMetadata]:
+    """Scan the parquet DataFrame and return parseable BTC binary contracts.
+
+    Args:
+        df:               Full parquet DataFrame (all columns).
+        min_rows_last7d:  Minimum rows in the last 7-day window to include.
+
+    Returns:
+        List of :class:`~polymarket.contracts.ContractMetadata` objects, one
+        per qualifying (condition_id, YES-side) market.
+    """
+    # Keep only BTC/Bitcoin YES-side rows
+    btc_mask = (
+        df["question"].str.contains("Bitcoin|BTC", case=False, na=False)
+        & (df["side"].str.strip().str.lower() == "yes")
+    )
+    btc = df[btc_mask].copy()
+    btc["timestamp"] = pd.to_datetime(btc["timestamp"], utc=True)
+
+    contracts: list[ContractMetadata] = []
+    skipped = {"no_strike": 0, "no_date": 0, "sparse": 0}
+
+    for cid, grp in btc.groupby("condition_id"):
+        question = grp["question"].iloc[0]
+        last_ts = grp["timestamp"].max()
+
+        # Parse strike and direction
+        result = _parse_strike_direction(question)
+        if result is None:
+            skipped["no_strike"] += 1
+            continue
+        strike, direction = result
+
+        # Parse expiry
+        end_date_utc = _parse_expiry(question, last_ts)
+        if end_date_utc is None:
+            skipped["no_date"] += 1
+            continue
+
+        # Check coverage in the last 7-day window
+        window_start = last_ts - pd.Timedelta(hours=WINDOW_HOURS)
+        rows_in_window = (grp["timestamp"] >= window_start).sum()
+        if rows_in_window < min_rows_last7d:
+            skipped["sparse"] += 1
+            continue
+
+        # Determine settlement: final close ≥ 0.5 → YES won
+        final_close = pd.to_numeric(grp["close"], errors="coerce").dropna()
+        if final_close.empty:
+            skipped["sparse"] += 1
+            continue
+        # Use the last candle's close as settlement proxy
+        settlement = 1.0 if float(final_close.iloc[-1]) >= 0.5 else 0.0
+
+        pair_yes = _make_pair(strike, direction, end_date_utc, "YES")
+        pair_no = _make_pair(strike, direction, end_date_utc, "NO")
+
+        contracts.append(
+            ContractMetadata(
+                id=str(cid),
+                question=question,
+                slug=str(cid),
+                strike=strike,
+                direction=direction,
+                end_date_utc=end_date_utc,
+                start_date_utc=(
+                    grp["timestamp"].min().strftime("%Y-%m-%dT%H:%M:%SZ")
+                ),
+                settlement=settlement,
+                volume_usd=float(grp["volume"].sum()),
+                pair_yes=pair_yes,
+                pair_no=pair_no,
+                raw={"condition_id": cid, "rows_in_window": int(rows_in_window)},
+            )
+        )
+
+    logger.info(
+        "parse_btc_contracts: %d contracts parsed, skipped %d (no_strike=%d, "
+        "no_date=%d, sparse=%d)",
+        len(contracts), sum(skipped.values()),
+        skipped["no_strike"], skipped["no_date"], skipped["sparse"],
+    )
+    return contracts
+
+
+# ---------------------------------------------------------------------------
+# Feather builder
+# ---------------------------------------------------------------------------
+
+def build_feather_from_real_data(
+    df: pd.DataFrame,
+    contract: ContractMetadata,
+    output_dir: str | Path,
+    *,
+    max_ffill_hours: int = MAX_FFILL_HOURS,
+) -> Path:
+    """Extract real OHLCV for one contract and write a freqtrade feather file.
+
+    The function:
+
+    1. Filters the parquet to the contract's ``condition_id`` and ``side="Yes"``.
+    2. Selects the last :data:`WINDOW_HOURS` hours before expiry.
+    3. Converts string prices to float and timestamps to millisecond integers.
+    4. Reindexes to a full hourly grid and forward-fills gaps up to
+       ``max_ffill_hours`` consecutive missing candles.
+    5. Writes ``{pair_yes}-1h.feather`` to ``output_dir``.
+
+    Args:
+        df:              Full parquet DataFrame.
+        contract:        Parsed contract metadata.
+        output_dir:      Directory to write the feather file.
+        max_ffill_hours: Maximum consecutive hours to forward-fill.
+
+    Returns:
+        Path to the written feather file.
+    """
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    cid = contract.raw.get("condition_id", contract.id)
+
+    # Filter to this contract's YES side
+    mask = (
+        (df["condition_id"] == cid)
+        & (df["side"].str.strip().str.lower() == "yes")
+    )
+    grp = df[mask].copy()
+    if grp.empty:
+        raise ValueError(f"No rows found for condition_id={cid!r}")
+
+    grp["timestamp"] = pd.to_datetime(grp["timestamp"], utc=True)
+
+    # 7-day window before expiry
+    expiry_ts = pd.Timestamp(contract.end_date_utc.replace("Z", "+00:00"))
+    window_start = expiry_ts - pd.Timedelta(hours=WINDOW_HOURS)
+    grp = grp[(grp["timestamp"] >= window_start) & (grp["timestamp"] < expiry_ts)]
+
+    if grp.empty:
+        raise ValueError(
+            f"No rows in 7-day window for {contract.pair_yes} "
+            f"({window_start} – {expiry_ts})"
+        )
+
+    # Convert price strings to float
+    for col in ("open", "high", "low", "close"):
+        grp[col] = pd.to_numeric(grp[col], errors="coerce")
+
+    grp = grp.set_index("timestamp").sort_index()
+
+    # Reindex to full hourly grid
+    full_index = pd.date_range(start=window_start, end=expiry_ts - pd.Timedelta(hours=1),
+                               freq="h", tz="UTC")
+    ohlcv = grp[["open", "high", "low", "close", "volume"]].reindex(full_index)
+
+    # Forward-fill gaps up to max_ffill_hours
+    ohlcv = ohlcv.ffill(limit=max_ffill_hours)
+
+    # Log coverage
+    n_filled = ohlcv["close"].notna().sum()
+    coverage = n_filled / len(ohlcv)
+    if coverage < MIN_COVERAGE:
+        logger.warning(
+            "%s: coverage %.0f%% is below %.0f%% threshold — feather written but "
+            "results may be unreliable",
+            contract.pair_yes, coverage * 100, MIN_COVERAGE * 100,
+        )
+    else:
+        logger.info("%s: %.0f%% hourly coverage (%d/%d candles)",
+                    contract.pair_yes, coverage * 100, n_filled, len(ohlcv))
+
+    # Fill any remaining NaN with 0 for volume, and carry-forward for prices
+    ohlcv["volume"] = ohlcv["volume"].fillna(0.0)
+    ohlcv[["open", "high", "low", "close"]] = (
+        ohlcv[["open", "high", "low", "close"]].ffill().bfill()
+    )
+
+    # Convert to freqtrade format: date as ms int64
+    ohlcv = ohlcv.reset_index().rename(columns={"index": "date"})
+    ohlcv["date"] = ohlcv["date"].astype("int64") // 1_000_000
+
+    # Write feather (pair name → safe filename)
+    pair_safe = contract.pair_yes.replace("/", "_")
+    out_path = output_dir / f"{pair_safe}-1h.feather"
+    feather.write_feather(ohlcv[["date", "open", "high", "low", "close", "volume"]], str(out_path))
+    logger.info("Wrote %s (%d rows)", out_path, len(ohlcv))
+    return out_path
+
+
+# ---------------------------------------------------------------------------
+# End-to-end convenience wrapper
+# ---------------------------------------------------------------------------
+
+def build_all_feathers_from_parquet(
+    parquet_path: str | Path,
+    output_dir: str | Path,
+    *,
+    min_rows_last7d: int = int(WINDOW_HOURS * MIN_COVERAGE),
+    max_ffill_hours: int = MAX_FFILL_HOURS,
+    filter_condition_ids: Sequence[str] | None = None,
+    write_jsonl: bool = True,
+) -> list[ContractMetadata]:
+    """Load the parquet, parse BTC contracts, and build feather files.
+
+    Args:
+        parquet_path:         Path to the Polymarket parquet file.
+        output_dir:           Directory to write feather files.
+        min_rows_last7d:      Minimum rows in last 7d window to include.
+        max_ffill_hours:      Maximum gap fill length (hours).
+        filter_condition_ids: If given, only process these condition IDs.
+
+    Returns:
+        List of :class:`~polymarket.contracts.ContractMetadata` for all
+        contracts successfully written.
+    """
+    logger.info("Loading parquet from %s …", parquet_path)
+    df = pd.read_parquet(
+        str(parquet_path),
+        columns=["timestamp", "condition_id", "side",
+                 "open", "high", "low", "close", "volume", "question"],
+    )
+
+    if filter_condition_ids is not None:
+        df = df[df["condition_id"].isin(filter_condition_ids)]
+
+    contracts = parse_btc_contracts(df, min_rows_last7d=min_rows_last7d)
+    logger.info("Building feathers for %d contracts …", len(contracts))
+
+    written: list[ContractMetadata] = []
+    for contract in contracts:
+        try:
+            build_feather_from_real_data(
+                df, contract, output_dir, max_ffill_hours=max_ffill_hours
+            )
+            written.append(contract)
+        except Exception as exc:
+            logger.warning("Skipping %s: %s", contract.pair_yes, exc)
+
+    if write_jsonl and written:
+        jsonl_path = Path(output_dir) / "real_contracts.jsonl"
+        write_contracts_jsonl(written, jsonl_path)
+
+    logger.info(
+        "Done. Wrote feathers for %d/%d contracts into %s",
+        len(written), len(contracts), output_dir,
+    )
+    return written
+
+
+# ---------------------------------------------------------------------------
+# JSONL serialisation (for strategy contract registry)
+# ---------------------------------------------------------------------------
+
+def write_contracts_jsonl(
+    contracts: list[ContractMetadata],
+    output_path: str | Path,
+) -> Path:
+    """Serialise a list of :class:`~polymarket.contracts.ContractMetadata` to JSONL.
+
+    The output format mirrors the Polymarket REST API schema expected by
+    :func:`polymarket.contracts.load_contracts`, so the strategy can load
+    real-data contracts through the same code path.
+
+    Args:
+        contracts:    Parsed contracts (e.g. from :func:`parse_btc_contracts`).
+        output_path:  Path to write the ``.jsonl`` file.
+
+    Returns:
+        Path to the written file.
+    """
+    import json as _json
+
+    output_path = Path(output_path)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with output_path.open("w") as fh:
+        for c in contracts:
+            # Encode settlement as Polymarket outcomePrices JSON string
+            if c.settlement >= 0.5:
+                outcome_prices = '["1.0", "0.0"]'
+            else:
+                outcome_prices = '["0.0", "1.0"]'
+
+            record = {
+                "id": c.id,
+                "question": c.question,
+                "slug": c.slug,
+                "endDate": c.end_date_utc,
+                "startDate": c.start_date_utc,
+                "outcomePrices": outcome_prices,
+                "volume": str(c.volume_usd),
+            }
+            fh.write(_json.dumps(record) + "\n")
+
+    logger.info("Wrote %d contracts to %s", len(contracts), output_path)
+    return output_path
diff --git a/scripts/prepare_event_model.py b/scripts/prepare_event_model.py
index 264188d..de02d78 100644
--- a/scripts/prepare_event_model.py
+++ b/scripts/prepare_event_model.py
@@ -3,8 +3,8 @@
 Run this script once before backtesting with DualModelPolymarketPortfolio.
 It performs four steps:
 
-  0. Build feather files  — synthetic OHLCV price series per contract (required by
-                            the freqtrade backtester).
+  0. Build feather files  — OHLCV price series per contract (synthetic by default;
+                            use ``--use-real-data`` for real Polymarket prices).
   1. Build training data  — synthetic weekly BTC events from data_1h.csv.
   2. Train event model    — calibrated logistic regression, saved to pkl.
   3. Generate predictions — per-contract fair_value CSVs for the backtester.
@@ -13,8 +13,15 @@
 -----
 From the repo root::
 
+    # Default: synthetic OHLCV from a JSONL contracts file
     python scripts/prepare_event_model.py
 
+    # Real Polymarket prices from a trade-history parquet
+    python scripts/prepare_event_model.py \\
+        --use-real-data \\
+        --parquet-path mycode/data/combined_filtered_data.paquet \\
+        --output-dir   user_data/data/polymarket_ml_real
+
 All paths default to the standard repo layout.  Override via CLI flags::
 
     python scripts/prepare_event_model.py \\
@@ -61,6 +68,13 @@ def parse_args() -> argparse.Namespace:
                    help="Skip step 0 if feather files already exist in output-dir.")
     p.add_argument("--skip-training-data", action="store_true",
                    help="Skip step 1 if training_data.parquet already exists.")
+    # Real-data mode
+    p.add_argument("--use-real-data", action="store_true",
+                   help="Step 0: build feathers from real Polymarket trade data "
+                        "instead of synthetic prices. Requires --parquet-path.")
+    p.add_argument("--parquet-path", default="mycode/data/combined_filtered_data.paquet",
+                   help="Path to the Polymarket trade-history parquet file "
+                        "(used only with --use-real-data).")
     return p.parse_args()
 
 
@@ -77,10 +91,23 @@ def main() -> None:
     output_dir.mkdir(parents=True, exist_ok=True)
 
     # ------------------------------------------------------------------
-    # Step 0: Build synthetic feather files (required by backtester)
+    # Step 0: Build feather files (synthetic or real)
     # ------------------------------------------------------------------
     if args.skip_feathers:
         logger.info("Step 0 skipped — assuming feather files already exist in %s", output_dir)
+    elif args.use_real_data:
+        logger.info("Step 0/3 — Building real-data OHLCV feather files from parquet")
+        parquet_path = REPO_ROOT / args.parquet_path
+        from polymarket.real_data_builder import build_all_feathers_from_parquet
+        written = build_all_feathers_from_parquet(
+            parquet_path=parquet_path,
+            output_dir=output_dir,
+        )
+        logger.info("Step 0 complete: %d feather files written", len(written))
+        # In real-data mode, the contracts come from the parquet itself; skip
+        # the JSONL-based contracts file for step 3 (we'll derive them below).
+        contracts_path = None  # signal to step 3 to use the parsed list
+        _real_contracts = written
     else:
         logger.info("Step 0/3 — Building synthetic OHLCV feather files")
         from polymarket.data_builder import build_all_feathers
@@ -122,11 +149,22 @@ def main() -> None:
     # Step 3: Generate per-contract predictions
     # ------------------------------------------------------------------
     logger.info("Step 3/3 — Generating per-contract predictions")
-    from polymarket.contracts import load_contracts
     from polymarket.data_builder import build_event_predictions
 
-    contracts = load_contracts(contracts_path)
-    logger.info("  Loaded %d contracts from %s", len(contracts), contracts_path)
+    if args.use_real_data and not args.skip_feathers:
+        # Contracts were parsed from the parquet in step 0; JSONL also written
+        contracts = _real_contracts
+        logger.info("  Using %d contracts parsed from parquet", len(contracts))
+    elif args.use_real_data and args.skip_feathers:
+        # Feathers already built — reload contracts from the written JSONL
+        from polymarket.contracts import load_contracts as _load
+        real_jsonl = output_dir / "real_contracts.jsonl"
+        contracts = _load(real_jsonl)
+        logger.info("  Loaded %d contracts from %s", len(contracts), real_jsonl)
+    else:
+        from polymarket.contracts import load_contracts
+        contracts = load_contracts(contracts_path)
+        logger.info("  Loaded %d contracts from %s", len(contracts), contracts_path)
 
     build_event_predictions(
         btc_csv_path=btc_csv,
diff --git a/user_data/strategies/DualModelPolymarketPortfolio.py b/user_data/strategies/DualModelPolymarketPortfolio.py
index 8b323f4..2e508de 100644
--- a/user_data/strategies/DualModelPolymarketPortfolio.py
+++ b/user_data/strategies/DualModelPolymarketPortfolio.py
@@ -39,11 +39,12 @@
 
 import json
 import logging
-from datetime import UTC, datetime
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Optional
 
 import pandas as pd
+
 from freqtrade.persistence import Trade
 from freqtrade.strategy import IStrategy
 
@@ -56,7 +57,7 @@
 SETTLE_YES = 0.999
 SETTLE_NO = 0.001
 
-UTC = UTC
+UTC = timezone.utc
 
 
 class DualModelPolymarketPortfolio(IStrategy):
@@ -101,10 +102,19 @@ def _resolve_data_root(self) -> Path:
         return Path(__file__).resolve().parents[1]
 
     def _load_contracts_registry(self) -> dict[str, ContractMetadata]:
-        """Load contract metadata and return a dict keyed by pair_yes."""
+        """Load contract metadata and return a dict keyed by pair_yes.
+
+        The JSONL path can be overridden via ``config["contracts_jsonl"]``.
+        Defaults to ``user_data/data/polymarket_contracts/jan20.jsonl``.
+        """
         data_root = self._resolve_data_root()
-        jsonl_path = data_root / "data" / "polymarket_contracts" / "jan20.jsonl"
-        contracts = load_contracts(jsonl_path)
+        if hasattr(self, "config") and "contracts_jsonl" in self.config:
+            jsonl_path = Path(self.config["contracts_jsonl"])
+            if not jsonl_path.is_absolute():
+                jsonl_path = data_root / jsonl_path
+        else:
+            jsonl_path = data_root / "data" / "polymarket_contracts" / "jan20.jsonl"
+        contracts = load_contracts(jsonl_path, skip_unparseable=True)
         return {c.pair_yes: c for c in contracts}
 
     def _get_registry(self) -> dict[str, ContractMetadata]:
@@ -112,15 +122,27 @@ def _get_registry(self) -> dict[str, ContractMetadata]:
             self._contract_registry = self._load_contracts_registry()
         return self._contract_registry
 
+    def _get_predictions_dir(self) -> Path:
+        """Return the directory containing event_probs CSVs.
+
+        Defaults to ``user_data/data/polymarket_ml``.
+        Override via ``config["predictions_dir"]`` (relative paths are resolved
+        against ``user_data/``).
+        """
+        data_root = self._resolve_data_root()
+        if hasattr(self, "config") and "predictions_dir" in self.config:
+            p = Path(self.config["predictions_dir"])
+            return p if p.is_absolute() else data_root / p
+        return data_root / "data" / "polymarket_ml"
+
     def _load_event_probs(self, pair: str) -> pd.DataFrame | None:
         """Load per-contract event probability CSV produced by build_event_predictions.
 
         Returns a DataFrame indexed by UTC timestamps with a ``fair_value`` column,
         or ``None`` if the file does not exist.
         """
-        data_root = self._resolve_data_root()
         filename = pair.replace("/", "_") + "-event_probs.csv"
-        csv_path = data_root / "data" / "polymarket_ml" / filename
+        csv_path = self._get_predictions_dir() / filename
 
         if not csv_path.exists():
             return None