From 3cc68788b25550baa7c6ee8aab5b1c6331ce4ed2 Mon Sep 17 00:00:00 2001 From: NeuroQuantAI Date: Sun, 14 Jun 2026 16:53:13 -0700 Subject: [PATCH] Add quant risk and sizing research features --- README.md | 49 +++- data/samples/README.md | 28 +++ data/samples/sample_prices.csv | 421 +++++++++++++++++++++++++++++++++ src/neuroquant/__init__.py | 35 ++- src/neuroquant/app_helpers.py | 136 ++++++++++- src/neuroquant/backtest.py | 105 ++++++-- src/neuroquant/costs.py | 61 +++++ src/neuroquant/data.py | 45 +++- src/neuroquant/metrics.py | 49 ++++ src/neuroquant/research.py | 71 ++++++ src/neuroquant/risk.py | 105 ++++++++ src/neuroquant/sizing.py | 94 ++++++++ streamlit_app.py | 141 ++++++++++- tests/test_app_helpers.py | 54 +++++ tests/test_backtest.py | 45 ++++ tests/test_costs.py | 34 +++ tests/test_data.py | 38 ++- tests/test_metrics.py | 24 ++ tests/test_research.py | 27 +++ tests/test_risk.py | 62 +++++ tests/test_sizing.py | 67 ++++++ 21 files changed, 1648 insertions(+), 43 deletions(-) create mode 100644 data/samples/README.md create mode 100644 data/samples/sample_prices.csv create mode 100644 src/neuroquant/costs.py create mode 100644 src/neuroquant/risk.py create mode 100644 src/neuroquant/sizing.py create mode 100644 tests/test_costs.py create mode 100644 tests/test_risk.py create mode 100644 tests/test_sizing.py diff --git a/README.md b/README.md index 581d7e2..7353003 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,42 @@ exported to `sample_outputs/feature_sample.csv`. --- +## Quant v2 capabilities + +A more quant-serious research engine layered on top of the lab — all offline, +honest, and conservative by default. Every v2 feature is additive: the default +configuration reproduces the original behaviour exactly. + +- **Offline real-data CSV support** — bring your own local CSV (or use the + bundled **synthetic** sample in [`data/samples/`](data/samples/)). The loader + accepts `date`/`timestamp`, `close`, optional `open/high/low/volume`, and an + optional `benchmark_close`, with extra data-quality checks (sorted, + de-duplicated timestamps; suspicious zero-return and short-series warnings). + No data is downloaded at runtime and no network/API is ever called. +- **Position sizing** ([`sizing.py`](src/neuroquant/sizing.py)) — `fixed_unit` + (original 0/1), `fixed_fraction`, `capped_exposure`, and `volatility_target` + (trailing-vol targeting). Causal by construction; **no leverage and no + shorting by default** (max exposure 1.0). +- **Risk controls** ([`risk.py`](src/neuroquant/risk.py)) — optional volatility + cap and a simple drawdown guard, plus rolling volatility / Sharpe / drawdown + diagnostics. Off by default; controls only ever *reduce* exposure. +- **Structured cost model** ([`costs.py`](src/neuroquant/costs.py)) — explicit + **fee + spread + slippage** charged on turnover, with the legacy single-scalar + path preserved. Execution profiles pass structured assumptions through. +- **Gross vs net** — the engine reports gross and net returns, exposure, a cost + breakdown, and **cost drag**, so fee impact is explicit. +- **Benchmark comparison** — when a `benchmark_close` column is present, the lab + reports benchmark return and excess-vs-benchmark. +- **Validation upgrades** — an **overfit flag** (strong in-sample but weak + out-of-sample) and a transparent **0–100 robustness score** combining + out-of-sample Sharpe, walk-forward stability, drawdown, and cost resilience. + +These are research diagnostics on synthetic (or user-supplied) data — **not** +forecasts, **not** trading advice, with **no** live trading, broker/API +connections, or performance guarantees. + +--- + ## Visual overview | Visual | What it answers | @@ -179,7 +215,9 @@ frame = load_csv_series("my_series.csv") # needs a date/timestamp + close colum ``` The loader validates sorted, de-duplicated timestamps and a clean, positive -`close` column. `open`, `high`, `low`, `volume` are kept if present. +`close` column. `open`, `high`, `low`, `volume` and `benchmark_close` are kept +if present (a `benchmark_close` column enables benchmark comparison). A bundled, +fully-synthetic example lives in [`data/samples/`](data/samples/). --- @@ -260,12 +298,13 @@ Reports in `sample_outputs/`: - Synthetic data by default; it has no real-world structure and results do not generalise to any market. -- A small set of deliberately simple, explainable signal families; long-or-flat - positions only (no shorting, no leverage, no position sizing). +- A small set of deliberately simple, explainable signal families. Position + sizing and risk controls are conservative research options — **no leverage and + no shorting by default**. - Composite is a transparent score of simple signals — **not** a machine-learning model. -- No live trading, no order routing, and no execution modelling beyond a - simplified flat cost / slippage assumption. +- No live trading, no order routing, and no execution modelling beyond simplified + fee / spread / slippage assumptions charged on turnover. - Regime labels for *attribution* use full-series volatility quantiles (descriptive); the tradeable volatility *filter* uses a trailing threshold. - Not investment advice and not production trading infrastructure. diff --git a/data/samples/README.md b/data/samples/README.md new file mode 100644 index 0000000..f0c1cc3 --- /dev/null +++ b/data/samples/README.md @@ -0,0 +1,28 @@ +# Sample data + +`sample_prices.csv` is **fully synthetic** data generated by this project's own +seeded generator (`neuroquant.data.generate_synthetic_series`). It is included +purely to demonstrate the expected CSV schema for the optional real-data / +upload mode — it is **not** real market data and carries no third-party +licensing. + +- **Source:** generated locally by NeuroQuantAI (seeds 2024 and 99). +- **Date range:** 2021-01-01 → 2022-08-11 (420 business days). +- **Columns:** `date`, `close`, `benchmark_close`. + +## Bring your own CSV + +To run the lab on your own **local** historical data, provide a CSV with the +same shape (no network access is performed): + +``` +date,close,benchmark_close +2021-01-01,100.0,100.0 +2021-01-04,100.8,100.2 +... +``` + +Required: a `date`/`timestamp` column and a `close` column. Optional: +`open`, `high`, `low`, `volume`, and `benchmark_close` (enables benchmark +comparison). The loader validates sorted, de-duplicated timestamps and a clean, +positive `close` column. diff --git a/data/samples/sample_prices.csv b/data/samples/sample_prices.csv new file mode 100644 index 0000000..e89364a --- /dev/null +++ b/data/samples/sample_prices.csv @@ -0,0 +1,421 @@ +date,close,benchmark_close +2021-01-01,101.3769,100.1025 +2021-01-04,103.5951,99.6587 +2021-01-05,105.1826,99.729 +2021-01-06,103.8914,100.4358 +2021-01-07,102.0579,98.7065 +2021-01-08,102.1777,100.4033 +2021-01-11,103.3593,99.9646 +2021-01-12,104.0769,99.3901 +2021-01-13,106.5873,98.3746 +2021-01-14,107.6651,99.3154 +2021-01-15,108.5968,100.008 +2021-01-18,107.6015,101.2806 +2021-01-19,106.095,102.2096 +2021-01-20,108.1946,102.4993 +2021-01-21,108.2959,102.8571 +2021-01-22,109.4773,103.8444 +2021-01-25,107.5681,102.9576 +2021-01-26,106.9917,102.9309 +2021-01-27,105.2425,103.3454 +2021-01-28,104.2178,102.8994 +2021-01-29,105.4802,103.6654 +2021-02-01,103.5004,103.3216 +2021-02-02,102.8151,104.0399 +2021-02-03,103.0652,104.2071 +2021-02-04,102.204,104.7112 +2021-02-05,101.9,103.1547 +2021-02-08,101.637,102.2915 +2021-02-09,102.2216,103.6969 +2021-02-10,101.6806,103.9025 +2021-02-11,102.0718,103.8388 +2021-02-12,102.1778,104.8653 +2021-02-15,102.7742,105.6769 +2021-02-16,103.1061,105.6486 +2021-02-17,105.3837,104.9924 +2021-02-18,104.5098,107.0929 +2021-02-19,106.1836,107.8568 +2021-02-22,105.661,106.1957 +2021-02-23,104.3847,107.1123 +2021-02-24,106.0731,107.9602 +2021-02-25,105.5004,108.8643 +2021-02-26,105.0016,108.4472 +2021-03-01,103.154,110.0766 +2021-03-02,100.4084,109.2782 +2021-03-03,101.2702,110.6318 +2021-03-04,99.7776,110.9782 +2021-03-05,100.8225,112.9005 +2021-03-08,103.3052,112.4851 +2021-03-09,103.1821,113.2939 +2021-03-10,101.7124,114.278 +2021-03-11,102.2656,113.9313 +2021-03-12,103.3143,113.967 +2021-03-15,102.9942,113.5181 +2021-03-16,103.0485,114.0851 +2021-03-17,104.8844,114.8963 +2021-03-18,106.6561,114.5847 +2021-03-19,107.6773,115.0047 +2021-03-22,106.5033,114.3603 +2021-03-23,106.4609,113.7887 +2021-03-24,107.3308,111.6443 +2021-03-25,107.0677,110.3632 +2021-03-26,106.2539,109.6439 +2021-03-29,105.2335,109.7845 +2021-03-30,104.4037,111.4887 +2021-03-31,103.5272,111.8114 +2021-04-01,102.9527,111.936 +2021-04-02,104.5289,112.3485 +2021-04-05,103.4776,110.8071 +2021-04-06,104.709,110.8829 +2021-04-07,105.3106,109.9465 +2021-04-08,105.5337,109.3394 +2021-04-09,104.436,111.611 +2021-04-12,103.8489,113.7148 +2021-04-13,106.5797,114.8543 +2021-04-14,106.7491,115.5028 +2021-04-15,107.5309,116.1092 +2021-04-16,108.4943,115.017 +2021-04-19,110.0264,113.6047 +2021-04-20,109.7202,114.0084 +2021-04-21,108.8859,113.5224 +2021-04-22,108.8342,112.6666 +2021-04-23,108.4983,112.5987 +2021-04-26,109.6522,112.5396 +2021-04-27,109.9558,112.7753 +2021-04-28,110.3315,111.9667 +2021-04-29,110.5728,112.0085 +2021-04-30,112.3864,110.651 +2021-05-03,111.6299,112.0393 +2021-05-04,110.9711,113.7772 +2021-05-05,112.2891,112.55 +2021-05-06,112.1675,113.0546 +2021-05-07,112.7288,112.8276 +2021-05-10,111.699,112.9786 +2021-05-11,111.7665,113.0722 +2021-05-12,112.4294,114.4523 +2021-05-13,110.539,115.1103 +2021-05-14,109.5778,114.1377 +2021-05-17,110.2151,114.2521 +2021-05-18,113.5188,114.2548 +2021-05-19,114.2374,113.3691 +2021-05-20,114.1842,113.6237 +2021-05-21,112.9703,113.6206 +2021-05-24,113.581,115.1986 +2021-05-25,109.9799,115.0054 +2021-05-26,109.9421,115.5367 +2021-05-27,109.5041,112.9912 +2021-05-28,108.7998,111.1337 +2021-05-31,112.1654,111.5202 +2021-06-01,108.6485,111.6866 +2021-06-02,108.6497,112.7288 +2021-06-03,108.7798,111.4596 +2021-06-04,109.4755,111.1648 +2021-06-07,107.2517,108.4501 +2021-06-08,106.6351,107.5681 +2021-06-09,104.6134,107.1244 +2021-06-10,104.4713,107.0421 +2021-06-11,104.7692,106.4712 +2021-06-14,105.0249,107.5134 +2021-06-15,104.7864,105.9892 +2021-06-16,105.0715,104.6589 +2021-06-17,105.3456,105.1628 +2021-06-18,105.9336,104.7858 +2021-06-21,106.0001,106.217 +2021-06-22,103.6027,106.9729 +2021-06-23,102.5419,107.3508 +2021-06-24,103.0346,105.5475 +2021-06-25,101.853,104.9186 +2021-06-28,100.8315,105.2212 +2021-06-29,101.0105,104.5668 +2021-06-30,100.981,104.8833 +2021-07-01,102.1918,105.2187 +2021-07-02,102.905,104.3685 +2021-07-05,102.3552,103.2333 +2021-07-06,102.5381,104.2577 +2021-07-07,98.8269,104.329 +2021-07-08,97.837,104.4072 +2021-07-09,97.679,105.437 +2021-07-12,94.7226,107.3018 +2021-07-13,94.3547,104.3501 +2021-07-14,94.6922,105.9003 +2021-07-15,96.0037,106.984 +2021-07-16,96.5368,106.6894 +2021-07-19,98.9604,107.977 +2021-07-20,100.976,107.9085 +2021-07-21,98.8829,109.6677 +2021-07-22,98.6223,110.9464 +2021-07-23,98.4518,111.4273 +2021-07-26,98.5988,110.7579 +2021-07-27,97.8968,110.8769 +2021-07-28,98.7063,109.9324 +2021-07-29,99.6969,109.1852 +2021-07-30,97.7701,109.4036 +2021-08-02,99.0088,109.2854 +2021-08-03,98.2083,108.5803 +2021-08-04,99.5961,110.4078 +2021-08-05,100.36,109.8206 +2021-08-06,100.2199,109.8107 +2021-08-09,102.875,109.8299 +2021-08-10,104.1037,107.4716 +2021-08-11,104.1787,110.3426 +2021-08-12,104.5478,109.7307 +2021-08-13,107.9147,110.5661 +2021-08-16,109.9542,108.2354 +2021-08-17,111.3535,108.0756 +2021-08-18,111.699,106.3401 +2021-08-19,112.553,105.8228 +2021-08-20,112.8043,107.1919 +2021-08-23,110.622,108.0541 +2021-08-24,111.9366,108.7478 +2021-08-25,112.5756,109.3156 +2021-08-26,110.6494,107.4352 +2021-08-27,109.7622,106.2392 +2021-08-30,109.4418,106.8422 +2021-08-31,109.9392,108.3263 +2021-09-01,112.4718,108.8417 +2021-09-02,112.5315,109.4848 +2021-09-03,109.7601,110.1052 +2021-09-06,110.7244,110.8992 +2021-09-07,110.5159,111.4561 +2021-09-08,108.0726,110.253 +2021-09-09,104.9455,111.353 +2021-09-10,103.5345,110.3754 +2021-09-13,104.0761,108.7875 +2021-09-14,103.086,108.9495 +2021-09-15,103.924,107.5273 +2021-09-16,103.576,106.3918 +2021-09-17,103.8548,106.5214 +2021-09-20,104.8396,105.6701 +2021-09-21,105.6633,104.7551 +2021-09-22,104.2587,105.2549 +2021-09-23,106.937,106.4514 +2021-09-24,104.2545,107.2539 +2021-09-27,104.0313,108.2434 +2021-09-28,102.6896,106.6711 +2021-09-29,104.3221,107.3926 +2021-09-30,102.5906,107.4001 +2021-10-01,101.251,106.6543 +2021-10-04,99.7927,106.3542 +2021-10-05,98.05,105.9693 +2021-10-06,97.3518,104.8845 +2021-10-07,97.6094,102.8798 +2021-10-08,96.4157,103.2823 +2021-10-11,94.3439,103.5676 +2021-10-12,94.0279,104.9749 +2021-10-13,94.0003,106.1992 +2021-10-14,94.884,106.417 +2021-10-15,93.9004,107.3479 +2021-10-18,93.6818,109.0344 +2021-10-19,94.8032,108.3011 +2021-10-20,93.5975,108.0388 +2021-10-21,93.4908,108.3524 +2021-10-22,93.0646,106.912 +2021-10-25,91.3476,105.7804 +2021-10-26,91.2205,106.5035 +2021-10-27,92.5732,106.0668 +2021-10-28,95.3259,106.5699 +2021-10-29,93.5653,106.4691 +2021-11-01,94.719,107.8128 +2021-11-02,96.1155,108.1308 +2021-11-03,97.664,108.2965 +2021-11-04,97.1297,108.0186 +2021-11-05,97.5463,106.9392 +2021-11-08,96.7927,105.3178 +2021-11-09,97.5193,108.1864 +2021-11-10,99.0697,108.5496 +2021-11-11,98.77,109.6952 +2021-11-12,99.0737,108.7513 +2021-11-15,100.2062,109.2857 +2021-11-16,101.1649,109.7257 +2021-11-17,100.318,108.8034 +2021-11-18,102.1412,111.0436 +2021-11-19,102.807,112.7358 +2021-11-22,103.034,112.5775 +2021-11-23,103.1089,113.8654 +2021-11-24,104.0712,115.2472 +2021-11-25,105.4955,115.9536 +2021-11-26,103.7937,116.6062 +2021-11-29,102.6527,116.7997 +2021-11-30,100.4007,116.8358 +2021-12-01,101.0067,115.195 +2021-12-02,101.5404,114.7294 +2021-12-03,101.1075,115.2882 +2021-12-06,99.7034,115.0265 +2021-12-07,101.4432,116.3288 +2021-12-08,103.6042,116.4693 +2021-12-09,105.7823,115.524 +2021-12-10,105.8812,114.1132 +2021-12-13,106.1051,113.0851 +2021-12-14,106.3263,113.0684 +2021-12-15,106.1703,112.85 +2021-12-16,104.5197,109.8444 +2021-12-17,103.7297,110.464 +2021-12-20,104.8664,112.2601 +2021-12-21,104.8938,111.5061 +2021-12-22,104.2769,112.0483 +2021-12-23,103.9671,111.209 +2021-12-24,106.3906,112.6892 +2021-12-27,108.4997,111.8161 +2021-12-28,110.0985,111.6385 +2021-12-29,110.3845,113.2798 +2021-12-30,108.6942,114.3682 +2021-12-31,109.0014,114.0309 +2022-01-03,108.5778,115.4725 +2022-01-04,109.4888,116.2375 +2022-01-05,110.9885,117.505 +2022-01-06,110.0412,119.3482 +2022-01-07,111.9768,121.3382 +2022-01-10,112.0946,120.2894 +2022-01-11,112.2595,119.3157 +2022-01-12,111.4812,119.973 +2022-01-13,111.2452,119.6386 +2022-01-14,111.414,115.9533 +2022-01-17,111.1584,117.1139 +2022-01-18,110.7349,115.6651 +2022-01-19,110.2018,115.3469 +2022-01-20,113.1939,114.5845 +2022-01-21,112.9257,114.2074 +2022-01-24,114.0749,114.9463 +2022-01-25,114.224,112.9077 +2022-01-26,110.0011,112.5757 +2022-01-27,114.5912,110.8815 +2022-01-28,114.6277,110.533 +2022-01-31,113.8428,110.8474 +2022-02-01,115.8407,111.9499 +2022-02-02,118.1338,111.7014 +2022-02-03,117.2964,114.5309 +2022-02-04,116.0693,115.6255 +2022-02-07,114.7738,117.0869 +2022-02-08,115.3315,117.9792 +2022-02-09,115.6113,117.699 +2022-02-10,116.8307,117.1625 +2022-02-11,116.6929,116.12 +2022-02-14,115.9528,116.1802 +2022-02-15,116.1675,112.7202 +2022-02-16,115.3424,112.1955 +2022-02-17,116.3712,112.182 +2022-02-18,117.0386,110.6483 +2022-02-21,117.9233,109.4896 +2022-02-22,118.6543,109.955 +2022-02-23,118.0447,110.6453 +2022-02-24,115.4645,111.0122 +2022-02-25,117.4177,108.5376 +2022-02-28,119.1212,110.5947 +2022-03-01,118.0104,110.3588 +2022-03-02,119.4339,113.756 +2022-03-03,119.7109,113.7287 +2022-03-04,119.6327,114.3653 +2022-03-07,119.6583,113.8759 +2022-03-08,118.4085,114.6243 +2022-03-09,118.2427,116.6333 +2022-03-10,118.3583,117.2268 +2022-03-11,118.7067,118.4438 +2022-03-14,117.7795,119.2623 +2022-03-15,118.4027,117.3239 +2022-03-16,117.5446,118.0952 +2022-03-17,116.8109,117.3106 +2022-03-18,116.9665,117.9183 +2022-03-21,116.299,116.7859 +2022-03-22,114.7805,118.3896 +2022-03-23,114.0263,117.7642 +2022-03-24,113.0226,120.2878 +2022-03-25,113.2424,119.8908 +2022-03-28,114.27,120.9655 +2022-03-29,115.1699,119.4177 +2022-03-30,114.1962,119.6664 +2022-03-31,113.6915,118.8087 +2022-04-01,114.1125,119.5419 +2022-04-04,111.7579,121.335 +2022-04-05,113.0311,122.7366 +2022-04-06,114.8614,122.0156 +2022-04-07,115.9194,120.601 +2022-04-08,112.6975,120.1913 +2022-04-11,113.1087,118.953 +2022-04-12,110.4353,120.2195 +2022-04-13,111.7898,120.6591 +2022-04-14,108.6883,121.3955 +2022-04-15,106.7171,119.6749 +2022-04-18,107.2304,118.536 +2022-04-19,105.2696,117.4175 +2022-04-20,101.5707,117.82 +2022-04-21,100.1545,118.2648 +2022-04-22,100.7407,115.6986 +2022-04-25,99.2086,117.1955 +2022-04-26,97.8086,119.5199 +2022-04-27,98.0374,117.4027 +2022-04-28,98.0412,116.3864 +2022-04-29,99.6473,117.2873 +2022-05-02,100.7708,116.4082 +2022-05-03,100.3361,115.9996 +2022-05-04,99.4176,115.7791 +2022-05-05,100.8276,113.7062 +2022-05-06,99.9453,113.0788 +2022-05-09,98.534,111.7053 +2022-05-10,99.1395,111.3005 +2022-05-11,98.3675,111.3233 +2022-05-12,96.4054,111.637 +2022-05-13,97.1891,111.745 +2022-05-16,98.3468,110.9343 +2022-05-17,96.8281,111.1796 +2022-05-18,96.3752,113.4744 +2022-05-19,95.79,112.5104 +2022-05-20,95.5981,109.0327 +2022-05-23,95.0571,107.8613 +2022-05-24,93.2222,106.5539 +2022-05-25,93.2791,104.6843 +2022-05-26,92.8911,104.088 +2022-05-27,93.0713,103.1922 +2022-05-30,93.8984,103.121 +2022-05-31,94.9218,103.6284 +2022-06-01,95.4724,106.1884 +2022-06-02,92.2764,106.8845 +2022-06-03,91.0605,106.8122 +2022-06-06,91.1875,104.6883 +2022-06-07,92.1927,103.0999 +2022-06-08,90.9849,101.8664 +2022-06-09,90.0818,101.5222 +2022-06-10,92.1565,99.5188 +2022-06-13,89.1629,99.7027 +2022-06-14,88.7957,101.061 +2022-06-15,88.036,101.5829 +2022-06-16,87.5848,102.9624 +2022-06-17,87.1935,102.6939 +2022-06-20,87.9482,103.2353 +2022-06-21,89.0716,103.3337 +2022-06-22,92.75,104.0568 +2022-06-23,94.9727,105.0439 +2022-06-24,94.9306,103.8048 +2022-06-27,95.2774,102.3025 +2022-06-28,96.6303,101.2351 +2022-06-29,100.0961,102.3015 +2022-06-30,99.8961,102.8514 +2022-07-01,100.4634,103.9223 +2022-07-04,100.2687,103.0229 +2022-07-05,101.8832,103.1357 +2022-07-06,100.1341,103.6692 +2022-07-07,100.3692,101.7264 +2022-07-08,98.6902,101.3287 +2022-07-11,99.0896,101.6811 +2022-07-12,100.1774,101.9419 +2022-07-13,99.7014,101.1855 +2022-07-14,99.4451,101.0002 +2022-07-15,100.9787,101.7374 +2022-07-18,100.8852,102.2957 +2022-07-19,101.6779,103.0805 +2022-07-20,101.4068,103.5261 +2022-07-21,100.2274,103.454 +2022-07-22,99.7306,104.0224 +2022-07-25,102.5494,103.8496 +2022-07-26,102.2203,102.054 +2022-07-27,102.3468,102.3527 +2022-07-28,104.0162,100.9319 +2022-07-29,104.5779,99.7773 +2022-08-01,105.3735,99.2915 +2022-08-02,108.6376,100.0973 +2022-08-03,110.0163,99.9426 +2022-08-04,108.9264,102.1992 +2022-08-05,109.9646,103.0167 +2022-08-08,109.1786,103.1546 +2022-08-09,108.7467,101.1507 +2022-08-10,110.3399,101.992 +2022-08-11,110.4991,102.5756 diff --git a/src/neuroquant/__init__.py b/src/neuroquant/__init__.py index dec7cf9..026c60c 100644 --- a/src/neuroquant/__init__.py +++ b/src/neuroquant/__init__.py @@ -13,7 +13,12 @@ performance guarantees. """ -from .data import generate_synthetic_series, load_csv_series, load_csv_frame +from .data import ( + generate_synthetic_series, + load_csv_series, + load_csv_frame, + data_quality_report, +) from .validation import ( validate_price_frame, validate_window_config, @@ -21,6 +26,15 @@ ) from .features import build_feature_frame, add_regime_labels from .signals import build_signal_frame, SIGNAL_FAMILIES +from .costs import CostRates, resolve_cost_rates +from .sizing import compute_exposure, SIZING_METHODS +from .risk import ( + apply_risk_controls, + apply_drawdown_guard, + rolling_volatility, + rolling_sharpe, + rolling_drawdown, +) from .backtest import ( run_backtest, run_parameter_sweep, @@ -28,16 +42,18 @@ build_candidate_configs, BacktestConfig, ) -from .metrics import compute_kpis +from .metrics import compute_kpis, compute_extended_kpis from .research import ( split_train_test, walk_forward_validation, monte_carlo_bootstrap, + overfit_gap, + robustness_score, ) from .regime import summarize_by_regime from .stress import cost_sensitivity_analysis, stress_test_summary -__version__ = "1.2.0" +__version__ = "2.0.0" # Note: `pipeline` is intentionally NOT imported here. Eagerly importing it # would load the module during package init, which triggers a RuntimeWarning @@ -48,6 +64,7 @@ "generate_synthetic_series", "load_csv_series", "load_csv_frame", + "data_quality_report", "validate_price_frame", "validate_window_config", "ValidationError", @@ -55,15 +72,27 @@ "add_regime_labels", "build_signal_frame", "SIGNAL_FAMILIES", + "CostRates", + "resolve_cost_rates", + "compute_exposure", + "SIZING_METHODS", + "apply_risk_controls", + "apply_drawdown_guard", + "rolling_volatility", + "rolling_sharpe", + "rolling_drawdown", "run_backtest", "run_parameter_sweep", "run_config_sweep", "build_candidate_configs", "BacktestConfig", "compute_kpis", + "compute_extended_kpis", "split_train_test", "walk_forward_validation", "monte_carlo_bootstrap", + "overfit_gap", + "robustness_score", "summarize_by_regime", "cost_sensitivity_analysis", "stress_test_summary", diff --git a/src/neuroquant/app_helpers.py b/src/neuroquant/app_helpers.py index a7d4c5d..70d960c 100644 --- a/src/neuroquant/app_helpers.py +++ b/src/neuroquant/app_helpers.py @@ -9,6 +9,7 @@ from __future__ import annotations from dataclasses import dataclass +from pathlib import Path import pandas as pd @@ -18,10 +19,16 @@ config_label, run_backtest, ) -from .data import generate_synthetic_series, load_csv_frame -from .metrics import compute_kpis +from .data import data_quality_report, generate_synthetic_series, load_csv_frame +from .metrics import compute_extended_kpis, compute_kpis from .regime import summarize_by_regime -from .research import monte_carlo_bootstrap, split_train_test, walk_forward_validation +from .research import ( + monte_carlo_bootstrap, + overfit_gap, + robustness_score, + split_train_test, + walk_forward_validation, +) from .signals import required_warmup from .stress import cost_sensitivity_analysis, stress_test_summary from .validation import validate_price_frame @@ -171,6 +178,22 @@ class ResearchParams: use_volatility_filter: bool = False mc_simulations: int = 500 + # v2: position sizing & risk (conservative defaults reproduce v1 behaviour). + sizing_method: str = "fixed_unit" + fixed_fraction: float = 1.0 + target_volatility: float = 0.15 + max_exposure: float = 1.0 + allow_short: bool = False + use_risk_controls: bool = False + vol_cap: float = 0.30 + use_drawdown_guard: bool = False + drawdown_guard_level: float = 0.20 + + # v2: structured cost components (None keeps the legacy scalar path). + fee: float | None = None + spread: float = 0.0 + slippage: float = 0.0 + def to_config(self) -> BacktestConfig: """Build the :class:`BacktestConfig` described by these settings.""" return BacktestConfig( @@ -182,6 +205,18 @@ def to_config(self) -> BacktestConfig: zscore_entry=self.zscore_entry, cost_per_trade=self.cost_per_trade, use_volatility_filter=self.use_volatility_filter, + sizing_method=self.sizing_method, + fixed_fraction=self.fixed_fraction, + target_volatility=self.target_volatility, + max_exposure=self.max_exposure, + allow_short=self.allow_short, + use_risk_controls=self.use_risk_controls, + vol_cap=self.vol_cap, + use_drawdown_guard=self.use_drawdown_guard, + drawdown_guard_level=self.drawdown_guard_level, + fee=self.fee, + spread=self.spread, + slippage=self.slippage, ) @@ -202,6 +237,22 @@ def make_synthetic( ) +SAMPLE_CSV_PATH = ( + Path(__file__).resolve().parents[2] / "data" / "samples" / "sample_prices.csv" +) + + +def load_sample_csv() -> pd.DataFrame: + """Load the bundled, fully-synthetic sample CSV (offline; no network). + + The sample is generated by this project and ships with a ``benchmark_close`` + column so benchmark comparison can be demonstrated out of the box. + """ + from .data import load_csv_series + + return load_csv_series(SAMPLE_CSV_PATH) + + def load_uploaded_csv(file_like) -> pd.DataFrame: """Validate an uploaded CSV (in-memory) using the shared data-quality gates. @@ -296,6 +347,11 @@ def run_research(frame: pd.DataFrame, params: ResearchParams) -> dict: cost_sensitivity = cost_sensitivity_analysis(frame, config) stress_summary = stress_test_summary(frame, config) + extended_kpis = compute_extended_kpis(full_signals) + overfit = overfit_gap(in_sample_kpis, out_of_sample_kpis) + robustness = robustness_score(out_of_sample_kpis, walk_forward, cost_sensitivity) + has_benchmark = "benchmark_equity" in full_signals.columns + return { "config": config, "label": config_label(config), @@ -304,6 +360,7 @@ def run_research(frame: pd.DataFrame, params: ResearchParams) -> dict: "test_size": n - split_at, "full_signals": full_signals, "full_kpis": full_kpis, + "extended_kpis": extended_kpis, "in_sample_kpis": in_sample_kpis, "out_of_sample_kpis": out_of_sample_kpis, "walk_forward": walk_forward, @@ -312,6 +369,10 @@ def run_research(frame: pd.DataFrame, params: ResearchParams) -> dict: "regime_summary": regime_summary, "cost_sensitivity": cost_sensitivity, "stress_summary": stress_summary, + "overfit": overfit, + "robustness": robustness, + "has_benchmark": has_benchmark, + "data_warnings": data_quality_report(frame), "takeaway": build_takeaway( config_label(config), out_of_sample_kpis, monte_carlo, walk_forward ), @@ -401,6 +462,18 @@ def analyst_warnings(result: dict, turnover_warn: float = 0.50) -> list[dict]: elif result.get("walk_forward_message"): notes.append({"level": "info", "text": result["walk_forward_message"]}) + overfit = result.get("overfit") + if overfit and overfit.get("overfit_flag"): + notes.append({ + "level": "warning", + "text": "Possible overfitting: in-sample Sharpe is strong " + f"({overfit['in_sample_sharpe']:.2f}) but out-of-sample is " + f"weak ({overfit['out_of_sample_sharpe']:.2f}).", + }) + + for message in result.get("data_warnings", []): + notes.append({"level": "info", "text": message}) + if not notes: notes.append({ "level": "success", @@ -451,3 +524,60 @@ def drawdown_frame(signals: pd.DataFrame) -> pd.DataFrame: equity = signals["strategy_equity"] drawdown = (equity / equity.cummax() - 1.0) * 100 return pd.DataFrame({"Drawdown": drawdown}, index=signals.index) + + +def gross_net_frame(signals: pd.DataFrame) -> pd.DataFrame: + """Gross vs net cumulative return (%) — the gap is the cost drag.""" + net = (signals["strategy_equity"] - 1.0) * 100 + if "gross_equity" in signals.columns: + gross = (signals["gross_equity"] - 1.0) * 100 + else: + gross = net + return pd.DataFrame({"Gross": gross, "Net": net}, index=signals.index) + + +def exposure_frame(signals: pd.DataFrame) -> pd.DataFrame: + """Exposure (position weight) over time, for charts.""" + return pd.DataFrame({"Exposure": signals["position"]}, index=signals.index) + + +def cost_breakdown(signals: pd.DataFrame) -> pd.DataFrame: + """Total fee / spread / slippage drag over the run (as % of notional).""" + parts = {} + for col, label in ( + ("fee_cost", "Fee"), + ("spread_cost", "Spread"), + ("slippage_cost", "Slippage"), + ): + if col in signals.columns: + parts[label] = float(signals[col].sum() * 100) + if not parts: + parts = {"Total": float(signals.get("cost", pd.Series(dtype=float)).sum() * 100)} + return pd.DataFrame({"Cost drag (%)": parts}) + + +def rolling_risk_frame(signals: pd.DataFrame, window: int = 63) -> pd.DataFrame: + """Rolling annualised volatility (%) and rolling Sharpe of net returns.""" + from .risk import rolling_sharpe, rolling_volatility + + net = signals["strategy_return"] + return pd.DataFrame( + { + "Rolling vol (%)": rolling_volatility(net, 20) * 100, + "Rolling Sharpe": rolling_sharpe(net, window), + }, + index=signals.index, + ) + + +def benchmark_frame(signals: pd.DataFrame) -> pd.DataFrame | None: + """Strategy vs benchmark cumulative return (%), or ``None`` if no benchmark.""" + if "benchmark_equity" not in signals.columns: + return None + return pd.DataFrame( + { + "Strategy": (signals["strategy_equity"] - 1.0) * 100, + "Benchmark": (signals["benchmark_equity"] - 1.0) * 100, + }, + index=signals.index, + ) diff --git a/src/neuroquant/backtest.py b/src/neuroquant/backtest.py index 8e34ed0..d54caab 100644 --- a/src/neuroquant/backtest.py +++ b/src/neuroquant/backtest.py @@ -15,12 +15,15 @@ import pandas as pd +from .costs import decompose_costs, resolve_cost_rates from .metrics import compute_kpis +from .risk import apply_drawdown_guard, apply_risk_controls from .signals import ( SIGNAL_FAMILIES, build_signal_frame, required_warmup, ) +from .sizing import SIZING_METHODS, compute_exposure from .validation import ValidationError, validate_price_frame, validate_window_config @@ -60,6 +63,26 @@ class BacktestConfig: vol_filter_window: int = 60 vol_filter_quantile: float = 0.8 + # --- v2: position sizing ------------------------------------------------- + # ``fixed_unit`` reproduces the original 0/1 long-or-flat behaviour. + sizing_method: str = "fixed_unit" + fixed_fraction: float = 1.0 + target_volatility: float = 0.15 + vol_lookback: int = 20 + max_exposure: float = 1.0 + allow_short: bool = False + + # --- v2: optional risk controls (off by default) ------------------------ + use_risk_controls: bool = False + vol_cap: float = 0.30 + use_drawdown_guard: bool = False + drawdown_guard_level: float = 0.20 + + # --- v2: structured costs (None keeps the legacy cost_per_trade path) ---- + fee: float | None = None + spread: float = 0.0 + slippage: float = 0.0 + def config_label(config: BacktestConfig) -> str: """Short, human-readable label for a configuration.""" @@ -93,22 +116,37 @@ def _validate_config(config: BacktestConfig) -> None: raise ValidationError("zscore_window must be positive.") if config.zscore_entry <= 0: raise ValidationError("zscore_entry must be positive.") + if config.sizing_method not in SIZING_METHODS: + raise ValidationError( + f"Unknown sizing_method '{config.sizing_method}'. " + f"Choose one of: {', '.join(SIZING_METHODS)}." + ) + if config.max_exposure <= 0: + raise ValidationError("max_exposure must be positive.") + if config.sizing_method == "volatility_target" and config.target_volatility <= 0: + raise ValidationError("target_volatility must be positive.") def generate_signals(frame: pd.DataFrame, config: BacktestConfig) -> pd.DataFrame: - """Build positions and returns for one configuration. + """Build exposures and returns for one configuration. Steps: - * build the family's unshifted target position (see - :func:`neuroquant.signals.build_signal_frame`), - * SHIFT the target by one bar so today's decision uses only data - available up to yesterday — this avoids look-ahead bias, - * apply a transaction cost whenever the position changes, - * compute strategy and baseline returns plus cumulative equity. - - Returns a DataFrame with the family's component columns plus - ``position``, ``market_return``, ``strategy_return``, ``baseline_return``, - ``cost``, ``strategy_equity`` and ``baseline_equity``. + * build the family's unshifted target position + (:func:`neuroquant.signals.build_signal_frame`), + * size it into a target *exposure* (:mod:`neuroquant.sizing`) and apply + optional risk controls (:mod:`neuroquant.risk`) — all causal, + * SHIFT the exposure by one bar so today's decision uses only data + available up to yesterday (avoids look-ahead bias), + * optionally apply a drawdown guard, + * charge structured fee / spread / slippage costs on turnover + (:mod:`neuroquant.costs`), + * compute gross and net strategy returns, a buy-and-hold baseline, and an + optional benchmark (if a ``benchmark_close`` column is present). + + With the default config (``fixed_unit`` sizing, scalar cost, no risk + controls, no benchmark) this reproduces the original behaviour exactly; the + extra ``exposure`` / ``gross_return`` / ``net_return`` / cost-component / + benchmark columns are simply added alongside the originals. """ _validate_config(config) validate_price_frame(frame, min_rows=required_warmup(config) + 2) @@ -120,22 +158,47 @@ def generate_signals(frame: pd.DataFrame, config: BacktestConfig) -> pd.DataFram if col != "target_position": data[col] = signal_frame[col] - # Shift by one bar: act only on information from the previous bar. - data["position"] = signal_frame["target_position"].shift(1).fillna(0.0) - - data["market_return"] = data["close"].pct_change().fillna(0.0) - - position_change = data["position"].diff().abs().fillna(0.0) - data["cost"] = position_change * config.cost_per_trade + market_return = data["close"].pct_change().fillna(0.0) + data["market_return"] = market_return - data["strategy_return"] = ( - data["position"] * data["market_return"] - data["cost"] + # Size the raw signal into a target exposure, then apply optional risk + # controls — both use trailing data only. + raw_exposure = compute_exposure( + signal_frame["target_position"], market_return, config ) - data["baseline_return"] = data["market_return"] + raw_exposure = apply_risk_controls(raw_exposure, market_return, config) + + # Shift by one bar: act only on information from the previous bar. + position = raw_exposure.shift(1).fillna(0.0) + if config.use_drawdown_guard: + position = apply_drawdown_guard( + position, market_return, config.drawdown_guard_level + ) + data["position"] = position + data["exposure"] = position + + # Structured costs on turnover (legacy scalar path preserved via costs.py). + turnover = position.diff().abs().fillna(0.0) + cost_parts = decompose_costs(turnover, resolve_cost_rates(config)) + data["fee_cost"] = cost_parts["fee_cost"] + data["spread_cost"] = cost_parts["spread_cost"] + data["slippage_cost"] = cost_parts["slippage_cost"] + data["cost"] = cost_parts["total_cost"] + + data["gross_return"] = position * market_return + data["strategy_return"] = data["gross_return"] - data["cost"] + data["net_return"] = data["strategy_return"] + data["baseline_return"] = market_return data["strategy_equity"] = (1.0 + data["strategy_return"]).cumprod() + data["gross_equity"] = (1.0 + data["gross_return"]).cumprod() data["baseline_equity"] = (1.0 + data["baseline_return"]).cumprod() + if "benchmark_close" in frame.columns: + benchmark_return = frame["benchmark_close"].pct_change().fillna(0.0) + data["benchmark_return"] = benchmark_return + data["benchmark_equity"] = (1.0 + benchmark_return).cumprod() + return data diff --git a/src/neuroquant/costs.py b/src/neuroquant/costs.py new file mode 100644 index 0000000..a76adaa --- /dev/null +++ b/src/neuroquant/costs.py @@ -0,0 +1,61 @@ +"""Structured transaction-cost model: fee + spread + slippage. + +The backtest historically used a single ``cost_per_trade`` scalar. This module +generalises that into explicit fee, spread and slippage components while +preserving the old behaviour: when no structured components are supplied, the +legacy scalar is treated as an all-in fee so results are unchanged. + +Costs are charged on *turnover* — the absolute change in exposure from one bar +to the next — so partial sizing and continuous exposures are handled naturally. +""" + +from __future__ import annotations + +from dataclasses import dataclass + +import pandas as pd + + +@dataclass(frozen=True) +class CostRates: + """Per-turnover cost rates (fractions of traded notional).""" + + fee: float = 0.0 + spread: float = 0.0 + slippage: float = 0.0 + + @property + def total(self) -> float: + """Combined round-trip cost rate per unit of turnover.""" + return self.fee + self.spread + self.slippage + + +def resolve_cost_rates(config) -> CostRates: + """Resolve cost rates from a config, preserving legacy scalar behaviour. + + If ``config.fee`` is ``None`` (the default), the legacy ``cost_per_trade`` + scalar is used as an all-in fee (spread/slippage zero) so existing results + are reproduced exactly. Otherwise the explicit fee/spread/slippage fields + are used. + """ + fee = getattr(config, "fee", None) + if fee is None: + return CostRates(fee=float(getattr(config, "cost_per_trade", 0.0))) + return CostRates( + fee=float(fee), + spread=float(getattr(config, "spread", 0.0)), + slippage=float(getattr(config, "slippage", 0.0)), + ) + + +def decompose_costs(turnover: pd.Series, rates: CostRates) -> pd.DataFrame: + """Break per-bar turnover into fee / spread / slippage / total cost series.""" + return pd.DataFrame( + { + "fee_cost": turnover * rates.fee, + "spread_cost": turnover * rates.spread, + "slippage_cost": turnover * rates.slippage, + "total_cost": turnover * rates.total, + }, + index=turnover.index, + ) diff --git a/src/neuroquant/data.py b/src/neuroquant/data.py index 3621b00..8f63d23 100644 --- a/src/neuroquant/data.py +++ b/src/neuroquant/data.py @@ -66,8 +66,9 @@ def generate_synthetic_series( return pd.DataFrame({"close": levels}, index=index) -# Columns the CSV loader will keep if present, in canonical order. -_OPTIONAL_CSV_COLUMNS = ("open", "high", "low", "volume") +# Columns the CSV loader will keep if present, in canonical order. A +# ``benchmark_close`` column (if supplied) enables benchmark comparison. +_OPTIONAL_CSV_COLUMNS = ("open", "high", "low", "volume", "benchmark_close") def load_csv_series( @@ -189,3 +190,43 @@ def load_csv_frame( raise ValueError(f"Column 'close' has {n_bad} non-positive value(s).") return frame + + +def data_quality_report( + frame: pd.DataFrame, + short_rows_warning: int = 300, + zero_return_fraction_warning: float = 0.20, +) -> list[str]: + """Return soft, non-fatal data-quality warnings for a price frame. + + Unlike :func:`validate_price_frame` (which raises on hard failures), this + surfaces advisory notes — too few rows for stable walk-forward/robustness, + a high share of exactly-zero returns (stale or low-resolution data), and + whether a benchmark column is available. Returns an empty list when the + series looks clean. + """ + warnings: list[str] = [] + n = len(frame) + if n < short_rows_warning: + warnings.append( + f"Short series: only {n} rows. Walk-forward and robustness " + "diagnostics need a longer history to be meaningful." + ) + + if "close" in frame.columns and n > 1: + returns = frame["close"].pct_change().dropna() + if len(returns): + zero_fraction = float((returns == 0).mean()) + if zero_fraction > zero_return_fraction_warning: + warnings.append( + f"{zero_fraction:.0%} of returns are exactly zero — the " + "series may be stale, padded, or low-resolution." + ) + + if "benchmark_close" not in frame.columns: + warnings.append( + "No 'benchmark_close' column — benchmark comparison is unavailable " + "for this series." + ) + + return warnings diff --git a/src/neuroquant/metrics.py b/src/neuroquant/metrics.py index 7bfd81b..00b746f 100644 --- a/src/neuroquant/metrics.py +++ b/src/neuroquant/metrics.py @@ -161,3 +161,52 @@ def compute_kpis(signals: pd.DataFrame) -> dict: "correlation_to_baseline": correlation_to_baseline, "win_loss_ratio": float(win_loss_ratio), } + + +def compute_extended_kpis(signals: pd.DataFrame) -> dict: + """Richer quant-review metrics layered on top of :func:`compute_kpis`. + + Adds exposure, gross-vs-net, cost-drag, rolling-Sharpe and (when a + benchmark is present) benchmark-excess metrics. Used by the interactive lab; + the core pipeline keeps using :func:`compute_kpis` so its outputs are + unchanged. Metrics that are not meaningful for a given run are returned as + ``NaN`` rather than fabricated. + """ + out = dict(compute_kpis(signals)) + + net = signals["strategy_return"] + position = signals["position"] + + out["cagr"] = out["annualized_return"] + out["exposure_avg"] = float(position.mean()) + out["exposure_max"] = float(position.max()) + out["best_day"] = float(net.max()) + out["worst_day"] = float(net.min()) + out["win_rate"] = float((net > 0).mean()) + + if "gross_equity" in signals.columns: + gross_total = float(signals["gross_equity"].iloc[-1] - 1.0) + elif "gross_return" in signals.columns: + gross_total = float((1.0 + signals["gross_return"]).prod() - 1.0) + else: + gross_total = out["total_return"] + out["gross_return"] = gross_total + out["cost_drag"] = gross_total - out["total_return"] + + # Lazy import avoids a circular dependency at module load. + from .risk import rolling_sharpe + + rolling = rolling_sharpe(net, 63).dropna() + out["rolling_sharpe_median"] = ( + float(rolling.median()) if len(rolling) else float("nan") + ) + + if "benchmark_equity" in signals.columns: + bench_total = float(signals["benchmark_equity"].iloc[-1] - 1.0) + out["benchmark_return"] = bench_total + out["benchmark_excess"] = out["total_return"] - bench_total + else: + out["benchmark_return"] = float("nan") + out["benchmark_excess"] = float("nan") + + return out diff --git a/src/neuroquant/research.py b/src/neuroquant/research.py index da2781c..79feb7d 100644 --- a/src/neuroquant/research.py +++ b/src/neuroquant/research.py @@ -219,3 +219,74 @@ def monte_carlo_bootstrap( "total_returns": total_returns, "max_drawdowns": max_drawdowns, } + + +def overfit_gap(in_sample_kpis: dict, out_of_sample_kpis: dict) -> dict: + """Compare in-sample and out-of-sample Sharpe to flag possible overfitting. + + Returns the two Sharpe ratios, their gap, and an ``overfit_flag`` that is + true when the in-sample result looks strong (Sharpe > 0.5) but the + out-of-sample result is weak (Sharpe < 0) — the classic overfit signature. + """ + is_sharpe = float(in_sample_kpis["sharpe_ratio"]) + oos_sharpe = float(out_of_sample_kpis["sharpe_ratio"]) + return { + "in_sample_sharpe": is_sharpe, + "out_of_sample_sharpe": oos_sharpe, + "gap": is_sharpe - oos_sharpe, + "overfit_flag": bool(is_sharpe > 0.5 and oos_sharpe < 0.0), + } + + +def _clip01(value: float) -> float: + return float(min(1.0, max(0.0, value))) + + +def robustness_score( + out_of_sample_kpis: dict, + walk_forward: pd.DataFrame | None, + cost_sensitivity: pd.DataFrame | None, +) -> dict: + """A transparent 0–100 robustness heuristic (not a forecast or rating). + + Combines four components, each documented and bounded: + + * **OOS Sharpe** (up to 40 pts): out-of-sample Sharpe scaled against 1.5, + * **Walk-forward stability** (up to 25 pts): share of folds with positive + out-of-sample Sharpe, + * **Drawdown** (up to 15 pts): shallower out-of-sample drawdown scores more, + * **Cost resilience** (up to 20 pts): does the result stay positive as costs + rise across the sensitivity ladder? + + Returns the total ``score`` plus the component breakdown so the number is + never a black box. + """ + oos_sharpe = float(out_of_sample_kpis["sharpe_ratio"]) + sharpe_pts = _clip01(oos_sharpe / 1.5) * 40.0 + + if walk_forward is not None and not walk_forward.empty: + held = float((walk_forward["test_sharpe"] > 0).mean()) + else: + held = 0.0 + stability_pts = held * 25.0 + + drawdown = float(out_of_sample_kpis["max_drawdown"]) + drawdown_pts = _clip01(1.0 + drawdown / 0.5) * 15.0 # 0% → 15, -50% → 0 + + cost_pts = 0.0 + if cost_sensitivity is not None and not cost_sensitivity.empty: + cheapest = float(cost_sensitivity.iloc[0]["total_return"]) + priciest = float(cost_sensitivity.iloc[-1]["total_return"]) + if cheapest > 0 and priciest > 0: + cost_pts = 20.0 + elif cheapest > 0: + cost_pts = 10.0 + + total = sharpe_pts + stability_pts + drawdown_pts + cost_pts + return { + "score": round(total, 1), + "oos_sharpe_points": round(sharpe_pts, 1), + "stability_points": round(stability_pts, 1), + "drawdown_points": round(drawdown_pts, 1), + "cost_points": round(cost_pts, 1), + } diff --git a/src/neuroquant/risk.py b/src/neuroquant/risk.py new file mode 100644 index 0000000..69088f1 --- /dev/null +++ b/src/neuroquant/risk.py @@ -0,0 +1,105 @@ +"""Optional risk controls and rolling risk diagnostics. + +These are research diagnostics and *optional* exposure controls — not trading +advice. The controls are causal (they only use trailing information) and, like +sizing, the engine applies the one-bar lag before returns are realised. + +Defaults leave every control off so the base backtest is unchanged. +""" + +from __future__ import annotations + +import numpy as np +import pandas as pd + +TRADING_DAYS = 252 + + +# --- Rolling diagnostics ---------------------------------------------------- + +def rolling_volatility(returns: pd.Series, window: int = 20) -> pd.Series: + """Trailing annualised volatility of a return series.""" + return returns.rolling(window).std(ddof=0) * np.sqrt(TRADING_DAYS) + + +def rolling_sharpe(returns: pd.Series, window: int = 63) -> pd.Series: + """Trailing annualised Sharpe ratio (risk-free rate assumed zero).""" + mean = returns.rolling(window).mean() * TRADING_DAYS + vol = returns.rolling(window).std(ddof=0) * np.sqrt(TRADING_DAYS) + return (mean / vol).replace([np.inf, -np.inf], np.nan) + + +def rolling_drawdown(equity: pd.Series) -> pd.Series: + """Drawdown of an equity curve relative to its running peak (≤ 0).""" + return equity / equity.cummax() - 1.0 + + +# --- Optional exposure controls --------------------------------------------- + +def apply_volatility_cap( + exposure: pd.Series, + returns: pd.Series, + vol_cap: float, + lookback: int = 20, +) -> pd.Series: + """Scale exposure down when trailing volatility would breach ``vol_cap``. + + The cap can only *reduce* exposure (scale ≤ 1), never lever it up. Where + trailing volatility is undefined, exposure is left unchanged. + """ + realized = returns.rolling(lookback).std(ddof=0) * np.sqrt(TRADING_DAYS) + scale = (float(vol_cap) / realized).clip(upper=1.0) + scale = scale.where(realized > 0, other=1.0).fillna(1.0) + return exposure * scale + + +def apply_risk_controls(exposure: pd.Series, returns: pd.Series, config) -> pd.Series: + """Apply optional volatility cap and a hard max-exposure clip. + + No-op unless ``config.use_risk_controls`` is true. ``config`` is duck-typed. + """ + if not getattr(config, "use_risk_controls", False): + return exposure + capped = apply_volatility_cap( + exposure, + returns, + float(getattr(config, "vol_cap", 0.30)), + int(getattr(config, "vol_lookback", 20)), + ) + max_exposure = float(getattr(config, "max_exposure", 1.0)) + allow_short = bool(getattr(config, "allow_short", False)) + lower = -abs(max_exposure) if allow_short else 0.0 + return capped.clip(lower=lower, upper=abs(max_exposure)) + + +def apply_drawdown_guard( + position: pd.Series, + market_return: pd.Series, + level: float = 0.20, +) -> pd.Series: + """De-risk to flat after a trailing drawdown breach, re-enter at a new high. + + A simple, explainable stop-style rule applied to the (already shifted) + exposure: once the strategy's running drawdown reaches ``-level`` the guard + flattens exposure, re-enabling only when equity makes a fresh high. The + decision at each bar uses drawdown information through the prior bar, so the + rule is causal. Implemented sequentially because it is path-dependent. + """ + pos = position.to_numpy(dtype=float, copy=True) + mret = np.nan_to_num(market_return.to_numpy(dtype=float)) + out = pos.copy() + + equity = 1.0 + peak = 1.0 + guard_off = False + for t in range(len(pos)): + if guard_off: + out[t] = 0.0 + equity *= 1.0 + out[t] * mret[t] + peak = max(peak, equity) + drawdown = equity / peak - 1.0 + if drawdown <= -abs(level): + guard_off = True + elif equity >= peak: + guard_off = False + return pd.Series(out, index=position.index) diff --git a/src/neuroquant/sizing.py b/src/neuroquant/sizing.py new file mode 100644 index 0000000..4d7439a --- /dev/null +++ b/src/neuroquant/sizing.py @@ -0,0 +1,94 @@ +"""Position-sizing methods that turn a raw signal into a target exposure. + +All methods are **causal**: they use only trailing information to scale the +signal, and the backtest engine shifts the resulting exposure by one bar before +any return is applied, so no future information leaks in. + +Defaults are deliberately conservative: no leverage (``max_exposure = 1.0``), +no shorting (``allow_short = False``). ``fixed_unit`` reproduces the original +0/1 long-or-flat behaviour exactly. +""" + +from __future__ import annotations + +import numpy as np +import pandas as pd + +TRADING_DAYS = 252 + +SIZING_METHODS = ( + "fixed_unit", + "fixed_fraction", + "volatility_target", + "capped_exposure", +) + + +def _clip(exposure: pd.Series, max_exposure: float, allow_short: bool) -> pd.Series: + """Clip exposure to the allowed range (no leverage / no shorting by default).""" + lower = -abs(max_exposure) if allow_short else 0.0 + return exposure.clip(lower=lower, upper=abs(max_exposure)) + + +def fixed_unit(target: pd.Series) -> pd.Series: + """Exposure equals the raw signal (the original 0/1 long-or-flat behaviour).""" + return target + + +def fixed_fraction(target: pd.Series, fraction: float = 1.0) -> pd.Series: + """Exposure equals the signal scaled by a constant fraction.""" + return target * float(fraction) + + +def volatility_target( + target: pd.Series, + returns: pd.Series, + target_volatility: float = 0.15, + lookback: int = 20, +) -> pd.Series: + """Scale exposure to aim at a target annualised volatility. + + Uses **trailing** realised volatility (a rolling standard deviation of past + returns, annualised). Where trailing volatility is undefined (warm-up) or + zero, the scale is left undefined and the engine treats it as flat. + """ + realized = returns.rolling(lookback).std(ddof=0) * np.sqrt(TRADING_DAYS) + scale = (float(target_volatility) / realized).replace( + [np.inf, -np.inf], np.nan + ) + return target * scale + + +def compute_exposure(target_position: pd.Series, returns: pd.Series, config) -> pd.Series: + """Dispatch to the configured sizing method and clip to allowed exposure. + + Returns the *unshifted* target exposure; the backtest engine applies the + one-bar lag. ``config`` is duck-typed (attribute access) to avoid importing + :class:`neuroquant.backtest.BacktestConfig` here. + """ + method = getattr(config, "sizing_method", "fixed_unit") + max_exposure = float(getattr(config, "max_exposure", 1.0)) + allow_short = bool(getattr(config, "allow_short", False)) + fraction = float(getattr(config, "fixed_fraction", 1.0)) + + if method == "fixed_unit": + raw = fixed_unit(target_position) + elif method == "fixed_fraction": + raw = fixed_fraction(target_position, fraction) + elif method == "capped_exposure": + # Fraction sizing whose only real effect is the max-exposure clip below. + raw = fixed_fraction(target_position, fraction) + elif method == "volatility_target": + raw = volatility_target( + target_position, + returns, + float(getattr(config, "target_volatility", 0.15)), + int(getattr(config, "vol_lookback", 20)), + ) + else: + raise ValueError( + f"Unknown sizing_method '{method}'. Choose one of: " + f"{', '.join(SIZING_METHODS)}." + ) + + return _clip(raw, max_exposure, allow_short) diff --git a/streamlit_app.py b/streamlit_app.py index a18e56d..4a370b5 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -38,15 +38,22 @@ EXECUTION_PROFILES, ResearchParams, analyst_warnings, + benchmark_frame, + cost_breakdown, drawdown_frame, equity_frame, + exposure_frame, + gross_net_frame, + load_sample_csv, load_uploaded_csv, make_synthetic, resolve_execution, + rolling_risk_frame, run_research, scorecard_frame, ) from neuroquant.signals import SIGNAL_FAMILIES # noqa: E402 +from neuroquant.sizing import SIZING_METHODS # noqa: E402 st.set_page_config( page_title="NeuroQuantAI — Interactive Quant Research Lab", @@ -73,6 +80,8 @@ "Monte Carlo": "Resamples observed returns to estimate robustness — not future " "performance.", "Regime": "Shows how results differ across volatility environments.", + "Sizing & risk": "Gross vs net, exposure, cost breakdown, and rolling risk " + "under the chosen sizing/risk controls.", "Cost & stress": "Shows whether the result is fragile to costs or adverse " "assumptions.", } @@ -165,7 +174,14 @@ def _render_note(note: dict) -> None: with st.sidebar: # A. Data source -------------------------------------------------------- st.header("A · Data source") - data_mode = st.radio("Data mode", ["Synthetic data", "Upload CSV"]) + data_mode = st.radio( + "Data mode", ["Synthetic data", "Sample CSV", "Upload CSV"] + ) + if data_mode == "Sample CSV": + st.caption( + "Bundled **synthetic** sample (with a benchmark column) — offline, " + "no network. See `data/samples/`." + ) # B. Asset profile ------------------------------------------------------ st.header("B · Asset profile") @@ -192,7 +208,7 @@ def _render_note(note: dict) -> None: "Volatility", value=float(asset["volatility"]), step=0.001, format="%.3f", ) - else: + elif data_mode == "Upload CSV": uploaded = st.file_uploader( "Upload CSV (date/timestamp + close columns)", type=["csv"] ) @@ -200,6 +216,7 @@ def _render_note(note: dict) -> None: "Processed in-memory for this session only — never saved, never " "uploaded anywhere." ) + # "Sample CSV" needs no extra controls; it loads the bundled synthetic file. # C. Execution / connection assumptions --------------------------------- st.header("C · Execution profile") @@ -279,6 +296,38 @@ def _render_note(note: dict) -> None: show_cost = st.checkbox("Include cost sensitivity", value=True) st.caption("Walk-forward windows are sized automatically from the data length.") + # F. Position sizing & risk -------------------------------------------- + st.header("F · Position sizing & risk") + sizing_method = st.selectbox("Sizing method", list(SIZING_METHODS)) + st.caption( + "Conservative defaults: no leverage, no shorting. `fixed_unit` keeps the " + "original 0/1 long-or-flat behaviour." + ) + target_volatility = 0.15 + fixed_fraction = 1.0 + if sizing_method == "volatility_target": + target_volatility = st.slider( + "Target volatility (annualised)", 0.05, 0.40, 0.15, 0.01 + ) + elif sizing_method in ("fixed_fraction", "capped_exposure"): + fixed_fraction = st.slider("Exposure fraction", 0.1, 1.0, 1.0, 0.1) + max_exposure = st.slider("Max exposure", 0.1, 1.0, 1.0, 0.1) + allow_short = st.checkbox("Allow shorting", value=False) + st.caption( + "Shorting is off by default and rarely meaningful for long-or-flat signals." + ) + + use_risk_controls = st.checkbox("Risk controls (volatility cap)", value=False) + vol_cap = 0.30 + if use_risk_controls: + vol_cap = st.slider("Volatility cap (annualised)", 0.10, 0.60, 0.30, 0.05) + use_drawdown_guard = st.checkbox("Drawdown guard", value=False) + drawdown_guard_level = 0.20 + if use_drawdown_guard: + drawdown_guard_level = st.slider( + "Drawdown guard level", 0.05, 0.50, 0.20, 0.05 + ) + run_clicked = st.button("Run research", type="primary", use_container_width=True) @@ -290,12 +339,23 @@ def _render_note(note: dict) -> None: st.error("Please upload a CSV file, or switch to synthetic data.") st.stop() frame = load_uploaded_csv(uploaded) + elif data_mode == "Sample CSV": + frame = load_sample_csv() else: frame = _cached_synthetic( int(n_days), int(seed), float(start_value), float(drift), float(volatility), ) + # Pass structured cost components for preset execution profiles so the + # cost breakdown is meaningful; custom profiles use the single scalar. + if assumptions.get("is_custom", True) or assumptions.get("fee") is None: + fee = spread = slippage = None + else: + fee = float(assumptions["fee"]) + spread = float(assumptions["spread"]) + slippage = float(assumptions["slippage"]) + params = ResearchParams( signal_family=signal_family, short_window=int(short_window), @@ -307,6 +367,18 @@ def _render_note(note: dict) -> None: train_fraction=float(train_fraction), use_volatility_filter=bool(use_volatility_filter), mc_simulations=int(mc_simulations), + sizing_method=sizing_method, + fixed_fraction=float(fixed_fraction), + target_volatility=float(target_volatility), + max_exposure=float(max_exposure), + allow_short=bool(allow_short), + use_risk_controls=bool(use_risk_controls), + vol_cap=float(vol_cap), + use_drawdown_guard=bool(use_drawdown_guard), + drawdown_guard_level=float(drawdown_guard_level), + fee=fee, + spread=spread if spread is not None else 0.0, + slippage=slippage if slippage is not None else 0.0, ) with st.spinner("Running research workflow…"): st.session_state["result"] = run_research(frame, params) @@ -331,8 +403,8 @@ def _render_note(note: dict) -> None: st.markdown( """ - **What you can change:** the data source, asset profile, execution - assumptions (fees / spread / slippage), signal family, parameters, and - validation settings. + assumptions (fees / spread / slippage), signal family, parameters, + position sizing & risk controls, and validation settings. - **What the output means:** risk/return KPIs vs a baseline, out-of-sample and walk-forward stability, Monte Carlo robustness, regime attribution, and cost sensitivity. @@ -348,9 +420,12 @@ def _render_note(note: dict) -> None: float(volatility), ) st.line_chart(preview.rename(columns={"close": "Synthetic close"})) + elif data_mode == "Sample CSV": + st.caption("Preview of the bundled synthetic sample (no backtest yet):") + st.line_chart(load_sample_csv()[["close", "benchmark_close"]]) else: st.caption("Upload a CSV in the sidebar to preview and analyse it.") - st.info("Configure the sidebar (A → E), then click **Run research**.") + st.info("Configure the sidebar (A → F), then click **Run research**.") st.stop() @@ -385,6 +460,23 @@ def _render_note(note: dict) -> None: f"z-score {cfg.zscore_window}/{cfg.zscore_entry:g}, volatility filter " f"{'on' if cfg.use_volatility_filter else 'off'}." ) + st.write( + f"**Sizing & risk:** {cfg.sizing_method}, max exposure {cfg.max_exposure:g}, " + f"shorting {'on' if cfg.allow_short else 'off'}, risk controls " + f"{'on' if cfg.use_risk_controls else 'off'}, drawdown guard " + f"{'on' if cfg.use_drawdown_guard else 'off'}." + ) + rob = result["robustness"] + overfit = result["overfit"] + s1, s2, s3 = st.columns(3) + s1.metric("Robustness score", f"{rob['score']:.0f}/100") + s2.metric("IS → OOS Sharpe", f"{overfit['in_sample_sharpe']:.2f} → {overfit['out_of_sample_sharpe']:.2f}") + s3.metric("Overfit flag", "Yes" if overfit["overfit_flag"] else "No") + st.caption( + "Robustness is a transparent 0–100 heuristic from out-of-sample Sharpe, " + "walk-forward stability, drawdown, and cost resilience — not a rating or " + "forecast." + ) st.info(result["takeaway"], icon="🧭") # 2 · KPI scorecards @@ -413,6 +505,15 @@ def _render_note(note: dict) -> None: st.line_chart(equity_frame(result["full_signals"])) st.subheader("Drawdown (%)") st.area_chart(drawdown_frame(result["full_signals"])) + bench = benchmark_frame(result["full_signals"]) + if bench is not None: + st.subheader("Strategy vs benchmark — cumulative return (%)") + st.caption( + "Benchmark comes from the uploaded `benchmark_close` column." + ) + st.line_chart(bench) + ek = result["extended_kpis"] + st.metric("Excess vs benchmark", f"{ek['benchmark_excess'] * 100:+.1f}%") # 4 · Walk-forward with tabs[3]: @@ -464,8 +565,31 @@ def _render_note(note: dict) -> None: st.dataframe(display, use_container_width=True, hide_index=True) st.bar_chart(regime.set_index("regime")[["total_return"]]) -# 7 · Cost sensitivity & stress +# 7 · Sizing & risk with tabs[6]: + st.subheader("Gross vs net — cumulative return (%)") + st.caption(TAB_CAPTIONS["Sizing & risk"]) + st.line_chart(gross_net_frame(result["full_signals"])) + ek = result["extended_kpis"] + g1, g2, g3, g4 = st.columns(4) + g1.metric("Gross return", f"{ek['gross_return'] * 100:+.1f}%") + g2.metric("Net return", f"{ek['total_return'] * 100:+.1f}%") + g3.metric("Cost drag", f"{ek['cost_drag'] * 100:.1f}%") + g4.metric("Avg exposure", f"{ek['exposure_avg']:.2f}") + + st.subheader("Exposure over time") + st.area_chart(exposure_frame(result["full_signals"])) + + ccol, rcol = st.columns(2) + with ccol: + st.subheader("Cost breakdown") + st.dataframe(cost_breakdown(result["full_signals"]), use_container_width=True) + with rcol: + st.subheader("Rolling risk") + st.line_chart(rolling_risk_frame(result["full_signals"])) + +# 8 · Cost sensitivity & stress +with tabs[7]: st.subheader("Cost sensitivity") st.caption(TAB_CAPTIONS["Cost & stress"]) if context.get("show_cost", True): @@ -499,8 +623,9 @@ def _render_note(note: dict) -> None: """ - **Synthetic data by default**; it has no real-world structure and results do not generalise to any market. -- A small set of **deliberately simple, explainable** signal families; - long-or-flat positions only (no shorting, leverage, or sizing). +- A small set of **deliberately simple, explainable** signal families. + Position sizing and risk controls are conservative research options + (no leverage and no shorting by default). - The composite is a transparent score of simple signals — **not** a machine-learning model. - **Asset and execution profiles model assumptions, not live connections.** No diff --git a/tests/test_app_helpers.py b/tests/test_app_helpers.py index f6c45fd..0fb698f 100644 --- a/tests/test_app_helpers.py +++ b/tests/test_app_helpers.py @@ -194,3 +194,57 @@ def test_analyst_warnings_clean_run_returns_notes(): ) notes = analyst_warnings(strong, turnover_warn=0.5) assert isinstance(notes, list) and notes + + +# --- v2: sizing/risk params, extended result, display helpers -------------- + +def test_research_params_v2_to_config(): + params = ResearchParams( + sizing_method="volatility_target", target_volatility=0.1, + max_exposure=0.5, use_risk_controls=True, vol_cap=0.2, + use_drawdown_guard=True, fee=0.0003, spread=0.0001, slippage=0.0001, + ) + cfg = params.to_config() + assert cfg.sizing_method == "volatility_target" + assert cfg.max_exposure == 0.5 + assert cfg.use_risk_controls is True + assert cfg.use_drawdown_guard is True + assert cfg.fee == 0.0003 + + +def test_run_research_includes_v2_outputs(): + from neuroquant.app_helpers import make_synthetic, run_research + frame = make_synthetic(n_days=600, seed=42) + result = run_research(frame, ResearchParams(mc_simulations=100)) + for key in ("extended_kpis", "overfit", "robustness", "has_benchmark", + "data_warnings"): + assert key in result + assert 0.0 <= result["robustness"]["score"] <= 100.0 + + +def test_display_frames(): + from neuroquant.app_helpers import ( + benchmark_frame, cost_breakdown, exposure_frame, gross_net_frame, + make_synthetic, rolling_risk_frame, run_research, + ) + from neuroquant.backtest import BacktestConfig, run_backtest + frame = make_synthetic(n_days=500, seed=42) + result = run_research(frame, ResearchParams(cost_per_trade=0.005, mc_simulations=100)) + sig = result["full_signals"] + assert list(gross_net_frame(sig).columns) == ["Gross", "Net"] + assert list(exposure_frame(sig).columns) == ["Exposure"] + assert "Rolling Sharpe" in rolling_risk_frame(sig).columns + assert not cost_breakdown(sig).empty + # No benchmark on synthetic data. + assert benchmark_frame(sig) is None + # Benchmark frame appears when a benchmark column is present. + fb = frame.copy(); fb["benchmark_close"] = frame["close"].to_numpy() * 0.5 + sb = run_backtest(fb, BacktestConfig(20, 60))["signals"] + assert benchmark_frame(sb) is not None + + +def test_load_sample_csv_has_benchmark(): + from neuroquant.app_helpers import load_sample_csv + frame = load_sample_csv() + assert "benchmark_close" in frame.columns + assert len(frame) > 100 diff --git a/tests/test_backtest.py b/tests/test_backtest.py index 3b514ac..65b101e 100644 --- a/tests/test_backtest.py +++ b/tests/test_backtest.py @@ -90,3 +90,48 @@ def test_config_from_row_roundtrip(sample_data): summary = run_config_sweep(sample_data, configs) best = config_from_row(summary.iloc[0]) assert best.signal_family in SIGNAL_FAMILIES + + +# --- v2: backward compatibility, sizing, costs, benchmark ------------------ + +def test_default_engine_backward_compatible(sample_data): + """Default config still satisfies net = gross - cost and legacy cost math.""" + sig = generate_signals(sample_data, BacktestConfig(20, 60)) + assert (abs(sig["strategy_return"] - (sig["gross_return"] - sig["cost"])) < 1e-12).all() + legacy_cost = sig["position"].diff().abs().fillna(0.0) * 0.001 + assert (abs(sig["cost"] - legacy_cost) < 1e-12).all() + + +def test_structured_costs_decompose(sample_data): + sig = generate_signals( + sample_data, + BacktestConfig(20, 60, fee=0.0003, spread=0.0002, slippage=0.0001), + ) + recombined = sig["fee_cost"] + sig["spread_cost"] + sig["slippage_cost"] + assert (abs(recombined - sig["cost"]) < 1e-12).all() + + +def test_costs_make_net_below_gross(sample_data): + sig = generate_signals(sample_data, BacktestConfig(20, 60, cost_per_trade=0.01)) + gross_total = sig["gross_equity"].iloc[-1] - 1 + net_total = sig["strategy_equity"].iloc[-1] - 1 + assert net_total <= gross_total + 1e-9 + + +def test_volatility_target_changes_exposure(sample_data): + base = generate_signals(sample_data, BacktestConfig(signal_family="momentum")) + sized = generate_signals( + sample_data, + BacktestConfig(signal_family="momentum", sizing_method="volatility_target", + target_volatility=0.08), + ) + # Sized exposure differs from the raw 0/1 exposure. + assert not base["position"].equals(sized["position"]) + + +def test_benchmark_columns_when_present(sample_data): + framed = sample_data.copy() + framed["benchmark_close"] = sample_data["close"].iloc[::-1].to_numpy() + sig = generate_signals(framed, BacktestConfig(20, 60)) + assert "benchmark_return" in sig.columns + assert "benchmark_equity" in sig.columns diff --git a/tests/test_costs.py b/tests/test_costs.py new file mode 100644 index 0000000..f234fea --- /dev/null +++ b/tests/test_costs.py @@ -0,0 +1,34 @@ +"""Tests for the structured cost model.""" + +import pandas as pd +import pytest + +from neuroquant.backtest import BacktestConfig +from neuroquant.costs import CostRates, decompose_costs, resolve_cost_rates + + +def test_legacy_scalar_path(): + rates = resolve_cost_rates(BacktestConfig(cost_per_trade=0.0012)) + assert rates.fee == 0.0012 + assert rates.spread == 0.0 + assert rates.slippage == 0.0 + assert rates.total == 0.0012 + + +def test_structured_components_sum(): + cfg = BacktestConfig(fee=0.0003, spread=0.0002, slippage=0.0001) + rates = resolve_cost_rates(cfg) + assert rates.total == pytest.approx(0.0006) + + +def test_decompose_costs_matches_total(): + turnover = pd.Series([0.0, 1.0, 0.5, 1.0]) + rates = CostRates(fee=0.001, spread=0.0005, slippage=0.0005) + table = decompose_costs(turnover, rates) + assert {"fee_cost", "spread_cost", "slippage_cost", "total_cost"}.issubset( + table.columns + ) + recombined = ( + table["fee_cost"] + table["spread_cost"] + table["slippage_cost"] + ) + assert (abs(recombined - table["total_cost"]) < 1e-12).all() diff --git a/tests/test_data.py b/tests/test_data.py index f91e7ad..e8e5d58 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -3,7 +3,11 @@ import pandas as pd import pytest -from neuroquant.data import generate_synthetic_series, load_csv_series +from neuroquant.data import ( + data_quality_report, + generate_synthetic_series, + load_csv_series, +) def test_same_seed_is_reproducible(): @@ -72,3 +76,35 @@ def test_csv_loader_rejects_missing_timestamp_column(tmp_path): path = _write_csv(tmp_path, "close\n100\n101\n") with pytest.raises(ValueError, match="timestamp"): load_csv_series(path) + + +def test_csv_loader_keeps_benchmark_close(tmp_path): + path = _write_csv( + tmp_path, + "date,close,benchmark_close\n" + "2021-01-01,100,50\n2021-01-02,101,51\n2021-01-03,102,52\n", + ) + frame = load_csv_series(path) + assert "benchmark_close" in frame.columns + + +def test_data_quality_report_flags_short_and_missing_benchmark(): + frame = generate_synthetic_series(n_days=120, seed=1) + warnings = data_quality_report(frame) + text = " ".join(warnings).lower() + assert "short series" in text + assert "benchmark" in text + + +def test_data_quality_report_flags_zero_returns(): + frame = generate_synthetic_series(n_days=400, seed=1).copy() + # Force many repeated values -> a high share of exactly-zero returns. + frame.iloc[100:300, frame.columns.get_loc("close")] = 123.0 + warnings = data_quality_report(frame) + assert any("zero" in w.lower() for w in warnings) + + +def test_data_quality_report_clean_long_series_with_benchmark(): + frame = generate_synthetic_series(n_days=600, seed=1).copy() + frame["benchmark_close"] = frame["close"].to_numpy() * 0.5 + assert data_quality_report(frame) == [] diff --git a/tests/test_metrics.py b/tests/test_metrics.py index d7a010d..a5c6e05 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -49,3 +49,27 @@ def test_excess_return_is_strategy_minus_baseline(sample_data): assert kpis["excess_return"] == pytest.approx( kpis["total_return"] - kpis["baseline_return"] ) + + +def test_extended_kpis_adds_quant_metrics(sample_data): + from neuroquant.metrics import compute_extended_kpis + + signals = generate_signals(sample_data, BacktestConfig(20, 60, cost_per_trade=0.005)) + ext = compute_extended_kpis(signals) + for key in ( + "cagr", "exposure_avg", "exposure_max", "best_day", "worst_day", + "win_rate", "gross_return", "cost_drag", "rolling_sharpe_median", + "benchmark_return", "benchmark_excess", + ): + assert key in ext + # With positive costs, gross return exceeds net and cost drag is positive. + assert ext["gross_return"] >= ext["total_return"] - 1e-9 + assert ext["cost_drag"] >= -1e-9 + + +def test_extended_kpis_benchmark_nan_without_benchmark(sample_data): + from neuroquant.metrics import compute_extended_kpis + + signals = generate_signals(sample_data, BacktestConfig(20, 60)) + ext = compute_extended_kpis(signals) + assert ext["benchmark_return"] != ext["benchmark_return"] # NaN diff --git a/tests/test_research.py b/tests/test_research.py index c6f44ad..8af6527 100644 --- a/tests/test_research.py +++ b/tests/test_research.py @@ -1,11 +1,14 @@ """Tests for train/test split, walk-forward validation, and Monte Carlo.""" import numpy as np +import pandas as pd import pytest from neuroquant.backtest import BacktestConfig, generate_signals from neuroquant.research import ( monte_carlo_bootstrap, + overfit_gap, + robustness_score, split_train_test, walk_forward_validation, ) @@ -82,3 +85,27 @@ def test_monte_carlo_is_reproducible(sample_data): a = monte_carlo_bootstrap(signals["strategy_return"], n_simulations=100, seed=9) b = monte_carlo_bootstrap(signals["strategy_return"], n_simulations=100, seed=9) assert np.allclose(a["total_returns"], b["total_returns"]) + + +def test_overfit_gap_flags_strong_is_weak_oos(): + flagged = overfit_gap({"sharpe_ratio": 1.2}, {"sharpe_ratio": -0.5}) + assert flagged["overfit_flag"] is True + assert flagged["gap"] == pytest.approx(1.7) + clean = overfit_gap({"sharpe_ratio": 0.4}, {"sharpe_ratio": 0.3}) + assert clean["overfit_flag"] is False + + +def test_robustness_score_bounds_and_components(sample_data): + wf = walk_forward_validation(sample_data, train_size=150, test_size=60) + oos = {"sharpe_ratio": 0.5, "max_drawdown": -0.1} + cost = pd.DataFrame({"total_return": [0.2, 0.1, 0.05]}) + score = robustness_score(oos, wf, cost) + assert 0.0 <= score["score"] <= 100.0 + for key in ("oos_sharpe_points", "stability_points", "drawdown_points", "cost_points"): + assert key in score + + +def test_robustness_score_handles_missing_walk_forward(): + oos = {"sharpe_ratio": -0.5, "max_drawdown": -0.4} + score = robustness_score(oos, None, None) + assert 0.0 <= score["score"] <= 100.0 diff --git a/tests/test_risk.py b/tests/test_risk.py new file mode 100644 index 0000000..9e95735 --- /dev/null +++ b/tests/test_risk.py @@ -0,0 +1,62 @@ +"""Tests for risk diagnostics and optional risk controls.""" + +import numpy as np +import pandas as pd + +from neuroquant.backtest import BacktestConfig +from neuroquant.risk import ( + apply_drawdown_guard, + apply_risk_controls, + apply_volatility_cap, + rolling_drawdown, + rolling_sharpe, + rolling_volatility, +) + + +def _series(values): + idx = pd.bdate_range("2021-01-01", periods=len(values)) + return pd.Series(values, index=idx) + + +def test_rolling_diagnostics_shapes(): + rng = np.random.default_rng(0) + returns = _series(rng.normal(0, 0.01, 100)) + assert len(rolling_volatility(returns, 20)) == 100 + assert len(rolling_sharpe(returns, 30)) == 100 + equity = (1 + returns).cumprod() + dd = rolling_drawdown(equity) + assert (dd <= 1e-9).all() + + +def test_risk_controls_off_by_default_is_noop(): + exposure = _series([1.0] * 50) + returns = _series([0.02] * 50) + out = apply_risk_controls(exposure, returns, BacktestConfig()) + assert out.equals(exposure) + + +def test_volatility_cap_only_reduces_exposure(): + exposure = _series([1.0] * 60) + rng = np.random.default_rng(2) + returns = _series(rng.normal(0, 0.04, 60)) + capped = apply_volatility_cap(exposure, returns, vol_cap=0.15, lookback=20) + assert (capped <= exposure + 1e-9).all() + + +def test_drawdown_guard_flattens_after_breach_and_is_monotone(): + # A steadily declining market: the guard should flatten exposure. + position = _series([1.0] * 12) + market = _series([-0.05] * 12) + guarded = apply_drawdown_guard(position, market, level=0.10) + # Guard never adds exposure beyond the base. + assert (guarded <= position + 1e-9).all() + # After the drawdown breach the later exposure is flat. + assert guarded.iloc[-1] == 0.0 + + +def test_drawdown_guard_noop_when_no_breach(): + position = _series([1.0] * 10) + market = _series([0.01] * 10) # rising — never breaches + guarded = apply_drawdown_guard(position, market, level=0.20) + assert guarded.tolist() == position.tolist() diff --git a/tests/test_sizing.py b/tests/test_sizing.py new file mode 100644 index 0000000..81f1f9b --- /dev/null +++ b/tests/test_sizing.py @@ -0,0 +1,67 @@ +"""Tests for position-sizing methods (causal, conservative).""" + +import numpy as np +import pandas as pd + +from neuroquant.backtest import BacktestConfig +from neuroquant.sizing import compute_exposure, fixed_fraction, fixed_unit + + +def _series(values): + idx = pd.bdate_range("2021-01-01", periods=len(values)) + return pd.Series(values, index=idx) + + +def test_fixed_unit_is_identity(): + target = _series([0.0, 1.0, 1.0, 0.0]) + assert fixed_unit(target).equals(target) + + +def test_fixed_fraction_scales(): + target = _series([0.0, 1.0, 1.0]) + assert fixed_fraction(target, 0.5).tolist() == [0.0, 0.5, 0.5] + + +def test_default_sizing_matches_raw_signal(sample_data): + """fixed_unit default keeps the original 0/1 exposure (clipped no-op).""" + target = _series([0.0, 1.0, 1.0, 0.0, 1.0]) + returns = _series([0.0, 0.01, -0.02, 0.0, 0.03]) + out = compute_exposure(target, returns, BacktestConfig()) + assert out.tolist() == target.tolist() + + +def test_max_exposure_and_no_short_clip(): + target = _series([1.0, 1.0, -1.0]) + returns = _series([0.0, 0.0, 0.0]) + cfg = BacktestConfig(sizing_method="fixed_fraction", fixed_fraction=2.0, + max_exposure=1.0, allow_short=False) + out = compute_exposure(target, returns, cfg) + # Clipped to [0, 1]: no leverage, no shorting. + assert out.max() <= 1.0 + 1e-9 + assert out.min() >= 0.0 + + +def test_volatility_target_reduces_in_high_vol(): + target = _series([1.0] * 80) + # Higher-volatility returns should pull target exposure below 1. + rng = np.random.default_rng(0) + returns = _series(rng.normal(0, 0.03, 80)) + cfg = BacktestConfig(sizing_method="volatility_target", target_volatility=0.10, + vol_lookback=20, max_exposure=1.0) + out = compute_exposure(target, returns, cfg).dropna() + assert (out <= 1.0 + 1e-9).all() + assert out.mean() < 1.0 + + +def test_volatility_target_is_causal(): + """Exposure on a prefix matches the full series (no look-ahead).""" + rng = np.random.default_rng(1) + returns = _series(rng.normal(0, 0.02, 200)) + target = _series([1.0] * 200) + cfg = BacktestConfig(sizing_method="volatility_target") + full = compute_exposure(target, returns, cfg) + prefix = compute_exposure(target.iloc[:120], returns.iloc[:120], cfg) + a, b = full.iloc[:120], prefix + mask = a.notna() & b.notna() + assert mask.sum() > 0 + assert np.allclose(a[mask].to_numpy(), b[mask].to_numpy())