diff --git a/.rhiza/template.yml b/.rhiza/template.yml index ad1bde3..b7469cb 100644 --- a/.rhiza/template.yml +++ b/.rhiza/template.yml @@ -1,5 +1,5 @@ repository: "jebel-quant/rhiza" -ref: "v0.18.4" +ref: "v0.18.8" profiles: - github-project diff --git a/README.md b/README.md index f39ab3a..8bb0fcf 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ from fast_minimum_variance import Problem # 500 daily returns, 20 assets X = np.random.default_rng(42).standard_normal((500, 20)) -w, iters = Problem(X).solve_cg() # matrix-free CG — recommended +w, outer, inner = Problem(X).solve_cg() # matrix-free CG — recommended w, iters = Problem(X).solve_kkt() # direct dense solve — exact baseline assert abs(w.sum() - 1.0) < 1e-8 @@ -48,7 +48,7 @@ it compresses the eigenvalue spectrum and directly cuts CG iteration counts. Use ```python T, N = X.shape -w, iters = Problem(X, alpha=N / (N + T)).solve_cg() +w, outer, inner = Problem(X, alpha=N / (N + T)).solve_cg() ``` On S&P 500 equity data (495 assets, 1192 days), shrinkage cuts CG iterations from 685 to @@ -56,8 +56,9 @@ On S&P 500 equity data (495 assets, 1192 days), shrinkage cuts CG iterations fro ## Solvers -All solvers are methods on `Problem` and return `(w, iters)` where +All solvers are methods on `Problem` and return `(w, ...)` where $w \in \mathbb{R}^N$, $\sum_i w_i = 1$, $w_i \geq 0$. +`solve_cg` returns `(w, outer_steps, inner_iters)`; all others return `(w, iters)`. | Method | Approach | When to use | |---|---|---| @@ -151,12 +152,12 @@ The same solver handles a range of portfolio construction problems by choosing $ ```python # Mean-variance mu = np.random.default_rng(0).standard_normal(N) # expected returns, shape (N,) -w, _ = Problem(X, rho=1.0, mu=mu).solve_cg() +w, *_ = Problem(X, rho=1.0, mu=mu).solve_cg() # Minimum tracking error to benchmark b b = np.ones(N) / N # equal-weight benchmark mu_te = X.T @ (X @ b) -w, _ = Problem(X, rho=2.0, mu=mu_te).solve_cg() +w, *_ = Problem(X, rho=2.0, mu=mu_te).solve_cg() ``` When `rho != 0`, two SPD solves are performed per outer step: $\Sigma_a v_1 = \mathbf{1}$ diff --git a/book/marimo/notebooks/data/ftse100_pct_returns.parquet b/book/marimo/notebooks/data/ftse100_pct_returns.parquet new file mode 100644 index 0000000..769c585 Binary files /dev/null and b/book/marimo/notebooks/data/ftse100_pct_returns.parquet differ diff --git a/book/marimo/notebooks/data/sp500_pct_returns.parquet b/book/marimo/notebooks/data/sp500_pct_returns.parquet index db4b605..3f69265 100644 Binary files a/book/marimo/notebooks/data/sp500_pct_returns.parquet and b/book/marimo/notebooks/data/sp500_pct_returns.parquet differ diff --git a/book/marimo/notebooks/fetch_sp500.py b/book/marimo/notebooks/fetch_sp500.py deleted file mode 100644 index 3ec2a1a..0000000 --- a/book/marimo/notebooks/fetch_sp500.py +++ /dev/null @@ -1,83 +0,0 @@ -"""Fetch S&P 500 constituent returns for the last 5 years via yfinance.""" - -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "yfinance", -# "pandas", -# "numpy", -# "lxml", -# "requests", -# "pyarrow", -# "marimo" -# ] -# /// - -import marimo - -__generated_with = "0.23.3" -app = marimo.App() - -with app.setup: - import io - from pathlib import Path - - import pandas as pd - import requests - import yfinance as yf - - -@app.cell -def _(): - # ── 1. S&P 500 tickers from Wikipedia ───────────────────────────────────────── - - resp = requests.get( - "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies", - headers={"User-Agent": "Mozilla/5.0 (research script)"}, - timeout=30, - ) - resp.raise_for_status() - tables = pd.read_html(io.StringIO(resp.text)) - tickers = tables[0]["Symbol"].str.replace(".", "-", regex=False).tolist() - print(f"Found {len(tickers)} tickers in the S&P 500 index") - - # ── 2. Download 5 years of adjusted close prices ────────────────────────────── - - raw = yf.download( - tickers, - period="5y", - auto_adjust=True, - progress=True, - threads=True, - )["Close"] - - print(f"\nRaw download: {raw.shape[0]} trading days x {raw.shape[1]} tickers") - - # ── 3. Clean: keep tickers with <5% missing days, forward-fill short gaps ───── - - threshold = 0.05 - missing_frac = raw.isna().mean() - keep = missing_frac[missing_frac <= threshold].index - raw = raw[keep].ffill() - - print(f"After cleaning: {raw.shape[0]} days x {raw.shape[1]} assets") - - # ── 4. Returns ──────────────────────────────────────────────────────────────── - - pct_returns = raw.pct_change().dropna() - print(f"Return matrix shape: {pct_returns.shape} (T={pct_returns.shape[0]}, N={pct_returns.shape[1]})") - - # ── 5. Save ─────────────────────────────────────────────────────────────────── - - out_dir = Path(__file__).parent / "data" - - pct_returns.to_parquet(out_dir / "sp500_pct_returns.parquet") - print("\nSaved data/sp500_pct_returns.parquet") - - print(f"Date range: {pct_returns.index[0].date()} → {pct_returns.index[-1].date()}") - print(f"Assets: {pct_returns.shape[1]}") - print(f"Trading days: {pct_returns.shape[0]}") - - -if __name__ == "__main__": - app.run() diff --git a/book/marimo/notebooks/graphs/markowitz_scaling.png b/book/marimo/notebooks/graphs/markowitz_scaling.png deleted file mode 100644 index a5b3217..0000000 Binary files a/book/marimo/notebooks/graphs/markowitz_scaling.png and /dev/null differ diff --git a/book/marimo/notebooks/graphs/markowitz_scaling.pdf b/book/marimo/notebooks/graphs/minvar_frontier.pdf similarity index 54% rename from book/marimo/notebooks/graphs/markowitz_scaling.pdf rename to book/marimo/notebooks/graphs/minvar_frontier.pdf index 8021b0d..d897e36 100644 Binary files a/book/marimo/notebooks/graphs/markowitz_scaling.pdf and b/book/marimo/notebooks/graphs/minvar_frontier.pdf differ diff --git a/book/marimo/notebooks/graphs/minvar_frontier.png b/book/marimo/notebooks/graphs/minvar_frontier.png new file mode 100644 index 0000000..1c0d19a Binary files /dev/null and b/book/marimo/notebooks/graphs/minvar_frontier.png differ diff --git a/book/marimo/notebooks/graphs/minvar_iters.pdf b/book/marimo/notebooks/graphs/minvar_iters.pdf new file mode 100644 index 0000000..65b3795 Binary files /dev/null and b/book/marimo/notebooks/graphs/minvar_iters.pdf differ diff --git a/book/marimo/notebooks/graphs/minvar_iters.png b/book/marimo/notebooks/graphs/minvar_iters.png new file mode 100644 index 0000000..212f0b0 Binary files /dev/null and b/book/marimo/notebooks/graphs/minvar_iters.png differ diff --git a/book/marimo/notebooks/graphs/minvar_loglog.png b/book/marimo/notebooks/graphs/minvar_loglog.png deleted file mode 100644 index 22a7ea5..0000000 Binary files a/book/marimo/notebooks/graphs/minvar_loglog.png and /dev/null differ diff --git a/book/marimo/notebooks/graphs/minvar_scaling.pdf b/book/marimo/notebooks/graphs/minvar_scaling.pdf index 13bacba..5ac6a77 100644 Binary files a/book/marimo/notebooks/graphs/minvar_scaling.pdf and b/book/marimo/notebooks/graphs/minvar_scaling.pdf differ diff --git a/book/marimo/notebooks/graphs/minvar_scaling.png b/book/marimo/notebooks/graphs/minvar_scaling.png index 1b6a01f..e6d0a38 100644 Binary files a/book/marimo/notebooks/graphs/minvar_scaling.png and b/book/marimo/notebooks/graphs/minvar_scaling.png differ diff --git a/book/marimo/notebooks/make_minvar_figures.py b/book/marimo/notebooks/make_minvar_figures.py deleted file mode 100644 index d35418f..0000000 --- a/book/marimo/notebooks/make_minvar_figures.py +++ /dev/null @@ -1,193 +0,0 @@ -"""Generate scaling figures and table benchmarks for minvar_paper.tex.""" - -# /// script -# requires-python = ">=3.11" -# dependencies = [ -# "matplotlib", -# "numpy", -# "pandas", -# "pyarrow", -# "fast-minimum-variance", -# "marimo" -# ] -# [tool.uv.sources] -# fast-minimum-variance = { path = "../../..", editable = true } -# /// - -import marimo - -__generated_with = "0.23.3" -app = marimo.App() - -with app.setup: - from pathlib import Path - - import matplotlib as mpl - import matplotlib.pyplot as plt - import numpy as np - import pandas as pd - from _common import print_table, run_timed, set_notebook_plot_style - - from fast_minimum_variance.minvar_problem import _MinVarProblem as MinVarProblem - - -@app.cell -def _(): - set_notebook_plot_style(mpl) - - # ── Table 2: S&P 500 ────────────────────────────────────────────────────────── - - print() - print("=" * 70) - print("S&P 500 n=495, T=1192 (long-only minimum variance)") - print("=" * 70) - file = Path(__file__).parent / "data" / "sp500_pct_returns.parquet" - df = pd.read_parquet(file) - R_sp = df.to_numpy() # noqa: N806 - _T_sp, N_sp = R_sp.shape # noqa: N806 - alpha_sp = 0.5 - target_sp = np.var(R_sp) * np.eye(N_sp) # mean squared entry = bar_lambda - print(f"Date range: {df.index[0].date()} → {df.index[-1].date()}") - print(f"alpha = {alpha_sp:.4f}") - - configs_sp_no_lw = [ - ("cvxpy", lambda: MinVarProblem(R_sp).solve_cvxpy()), - ("clarabel", lambda: MinVarProblem(R_sp).solve_clarabel()), - ("osqp", lambda: MinVarProblem(R_sp).solve_osqp()), - ("kkt", lambda: MinVarProblem(R_sp).solve_kkt()), - ("cg", lambda: MinVarProblem(R_sp).solve_cg()), - ("nnls", lambda: MinVarProblem(R_sp).solve_nnls()), - ] - configs_sp_lw = [ - ("cvxpy", lambda: MinVarProblem(R_sp, alpha=alpha_sp, target=target_sp).solve_cvxpy()), - ("clarabel", lambda: MinVarProblem(R_sp, alpha=alpha_sp, target=target_sp).solve_clarabel()), - ("osqp", lambda: MinVarProblem(R_sp, alpha=alpha_sp, target=target_sp).solve_osqp()), - ("kkt", lambda: MinVarProblem(R_sp, alpha=alpha_sp, target=target_sp).solve_kkt()), - ("cg", lambda: MinVarProblem(R_sp, alpha=alpha_sp, target=target_sp).solve_cg()), - ("nnls", lambda: MinVarProblem(R_sp, alpha=alpha_sp, target=target_sp).solve_nnls()), - ] - - sp_no_lw, sp_lw = {}, {} - for key, fn in configs_sp_no_lw: - (_w, iters), t = run_timed(fn) - sp_no_lw[key] = {"time_s": t, "iters": iters} - for key, fn in configs_sp_lw: - (_w, iters), t = run_timed(fn) - sp_lw[key] = {"time_s": t, "iters": iters} - - print_table("Without LW shrinkage (alpha=0)", sp_no_lw) - print_table(f"With LW shrinkage (alpha={alpha_sp:.4f})", sp_lw) - - # ── Panel A: runtime vs n ────────────────────────────────────────────────────── - - print() - print("=" * 70) - print("Runtime vs n (T=2n, LW shrinkage, long-only minimum variance)") - print("=" * 70) - - ns = [50, 100, 200, 300, 500, 750, 1000, 1500, 2000] - times = {k: [] for k in ("kkt", "cg", "nnls")} - rng2 = np.random.default_rng(0) - print(f"{'n':>6} {'kkt':>10} {'cg':>10} {'nnls':>10}") - print("-" * 44) - for n in ns: - T = 2 * n # noqa: N806 - R = rng2.standard_normal((T, n)) # noqa: N806 - alpha = n / (n + T) - bar_lam = float(np.linalg.norm(R, "fro") ** 2) / (n * T) - tgt = bar_lam * np.eye(n) - _, t_kkt = run_timed(lambda r=R, a=alpha, t=tgt: MinVarProblem(r, alpha=a, target=t).solve_kkt()) - _, t_cg = run_timed(lambda r=R, a=alpha, t=tgt: MinVarProblem(r, alpha=a, target=t).solve_cg()) - _, t_nnls = run_timed(lambda r=R, a=alpha, t=tgt: MinVarProblem(r, alpha=a, target=t).solve_nnls()) - times["kkt"].append(t_kkt) - times["cg"].append(t_cg) - times["nnls"].append(t_nnls) - print(f"{n:>6} {t_kkt:>10.4f} {t_cg:>10.4f} {t_nnls:>10.4f}") - - # ── Panel B: iterations vs shrinkage intensity alpha ────────────────────────── - - n_iter, T_iter = 500, 250 # noqa: N806 - R_iter = np.random.default_rng(1).standard_normal((T_iter, n_iter)) # noqa: N806 - bar_lambda_iter = float(np.linalg.norm(R_iter, "fro") ** 2) / (n_iter * T_iter) - alphas = np.linspace(0.01, 0.99, 40) - cg_iters_by_alpha = [] - print() - print("=" * 70) - print(f"Iterations vs alpha (n={n_iter}, T={T_iter}, rank-deficient)") - print("=" * 70) - for a in alphas: - tgt_iter = bar_lambda_iter * np.eye(n_iter) - _, iters = MinVarProblem(R_iter, alpha=a, target=tgt_iter).solve_cg() - cg_iters_by_alpha.append(iters) - print(f" alpha={a:.3f} iters={iters}") - - # ── Plot ─────────────────────────────────────────────────────────────────────── - - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(6.5, 2.8)) - - colors = {"kkt": "#1f77b4", "cg": "#ff7f0e", "nnls": "#2ca02c"} - labels = {"kkt": "KKT direct", "cg": "CG (matrix-free)", "nnls": "NNLS"} - for key in ("kkt", "cg", "nnls"): - ax1.plot(ns, times[key], marker="o", markersize=3, label=labels[key], color=colors[key]) - ax1.set_xscale("log") - ax1.set_yscale("log") - ax1.set_xlabel("Number of assets $n$") - ax1.set_ylabel("Wall-clock time (s)") - ax1.set_title(r"(a) Runtime vs. $n$ ($T=2n$, with LW shrinkage)") - ax1.legend(framealpha=0.9) - ax1.grid(True, which="both", linestyle=":", linewidth=0.5, alpha=0.7) - - ax2.plot(alphas, cg_iters_by_alpha, marker="o", markersize=3, color=colors["cg"], label=labels["cg"]) - ax2.set_xlabel(r"Shrinkage intensity $\alpha$ $(\kappa$ decreases $\rightarrow)$") - ax2.set_ylabel("CG iterations to convergence") - ax2.set_title(r"(b) Iterations vs. $\alpha$ ($n=500,\,T=250$)") - ax2.legend(framealpha=0.9) - ax2.grid(True, which="both", linestyle=":", linewidth=0.5, alpha=0.7) - - fig.tight_layout(pad=1.0) - - folder = Path(__file__).parent / "graphs" - fig.savefig(folder / "minvar_scaling.pdf", bbox_inches="tight") - fig.savefig(folder / "minvar_scaling.png", bbox_inches="tight", dpi=150) - print() - print("Saved graphs/minvar_scaling.pdf and graphs/minvar_scaling.png") - - # ── Standalone log-log Runtime vs n ─────────────────────────────────────────── - - fig2, ax = plt.subplots(figsize=(4.5, 3.2)) - - for key in ("kkt", "cg", "nnls"): - ax.plot(ns, times[key], marker="o", markersize=4, label=labels[key], color=colors[key]) - - # Reference slope lines anchored to the KKT curve at n=500 - n_arr = np.array(ns, dtype=float) - anchor_idx = ns.index(500) - t_anchor = times["kkt"][anchor_idx] - n_anchor = 500.0 - for exp, ls, lbl in [(2, "--", r"$O(n^2)$"), (3, ":", r"$O(n^3)$")]: - ax.plot( - n_arr, - t_anchor * (n_arr / n_anchor) ** exp, - color="gray", - linestyle=ls, - linewidth=0.9, - label=lbl, - ) - - ax.set_xscale("log") - ax.set_yscale("log") - ax.set_xlabel("Number of assets $n$") - ax.set_ylabel("Wall-clock time (s)") - ax.set_title(r"Runtime vs. $n$ ($T = 2n$, LW shrinkage)") - ax.legend(framealpha=0.9) - ax.grid(True, which="both", linestyle=":", linewidth=0.5, alpha=0.7) - fig2.tight_layout(pad=1.0) - - fig2.savefig(folder / "minvar_loglog.pdf", bbox_inches="tight") - fig2.savefig(folder / "minvar_loglog.png", bbox_inches="tight", dpi=150) - print("Saved graphs/minvar_loglog.pdf and graphs/minvar_loglog.png") - return - - -if __name__ == "__main__": - app.run() diff --git a/experiment/Makefile b/experiment/Makefile new file mode 100644 index 0000000..39b3400 --- /dev/null +++ b/experiment/Makefile @@ -0,0 +1,6 @@ +load_data: + uv run fetch_sp500.py + uv run fetch_ftse100.py + +experiment: + uv run experiment.py diff --git a/experiment/data/ftse100_pct_returns.parquet b/experiment/data/ftse100_pct_returns.parquet new file mode 100644 index 0000000..5fb7fd7 Binary files /dev/null and b/experiment/data/ftse100_pct_returns.parquet differ diff --git a/experiment/data/sp500_pct_returns.parquet b/experiment/data/sp500_pct_returns.parquet new file mode 100644 index 0000000..4db1eb3 Binary files /dev/null and b/experiment/data/sp500_pct_returns.parquet differ diff --git a/experiment/experiment.py b/experiment/experiment.py new file mode 100644 index 0000000..410dc3a --- /dev/null +++ b/experiment/experiment.py @@ -0,0 +1,145 @@ +"""Real-data benchmark for the paper. + + "Shrinkage as Preconditioning: Matrix-Free Methods for + Long-Only Portfolio Optimization" + +Usage: + uv run experiment.py # from the experiment/ directory + +Inputs: + data/sp500_pct_returns.parquet — S&P 500 daily pct returns + data/ftse100_pct_returns.parquet — FTSE 100 daily pct returns + Fetch with: uv run fetch_sp500.py / uv run fetch_ftse100.py + +Output (stdout): + For each dataset: four solver panels (no shrinkage, LW alpha=0.5, + LW oracle, RMT eigenvalue cleaning) across seven solvers; timings, + iterations, and speedup vs CVXPY. + +Hardware used in the paper: Apple M4 Pro, 14-core CPU, 48 GB RAM. +Software: Python 3.12, NumPy 2.4, SciPy 1.17, CVXPY 1.8.2, Clarabel 0.11.1. +""" + +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "pandas", +# "scikit-learn", +# "fast-minimum-variance", +# "pyarrow", +# ] +# +# [tool.uv.sources] +# fast-minimum-variance = { path = ".." } +# /// + +from __future__ import annotations + +from pathlib import Path + +import pandas as pd +from util.runner import print_table, run_timed + +from fast_minimum_variance.minvar_problem import _MinVarProblem as MinVarProblem +from fast_minimum_variance.shrinkage.util import ( + lw_alpha_and_target, + oas_alpha_and_target, + rmt_target_and_alpha, +) + +HERE = Path(__file__).parent + +DATASETS = { + "sp500": HERE / "data/sp500_pct_returns.parquet", + "ftse": HERE / "data/ftse100_pct_returns.parquet", +} + +SOLVERS_ALL = [ + ("cvxpy (Clarabel)", lambda p: p.solve_cvxpy(), False), + ("cvxpy (OSQP)", lambda p: p.solve_cvxpy(backend="osqp"), False), + ("Clarabel (direct API)", lambda p: p.solve_clarabel(), False), + ("OSQP (direct API)", lambda p: p.solve_osqp(), False), + ("CG (SPD)", lambda p: p.solve_cg(), False), + ("Proximal gradient", lambda p: p.solve_proximal(), False), + ("FISTA (Nesterov)", lambda p: p.solve_fista(), False), +] +SOLVERS_KEY = [ + ("cvxpy (Clarabel)", lambda p: p.solve_cvxpy(), False), + ("CG (SPD)", lambda p: p.solve_cg(), False), + ("Proximal gradient", lambda p: p.solve_proximal(), False), + ("FISTA (Nesterov)", lambda p: p.solve_fista(), False), +] + + +def _make_entry(prob, fn, is_kkt=False): + """Return {"time_s", "outer", "inner"} for one solver on one problem.""" + raw, t = run_timed(lambda: fn(prob)) + if len(raw) == 3: # solve_cg -> (w, outer, inner) + _, outer, inner = raw + elif is_kkt: # solve_kkt -> (w, outer_steps) + _, outer = raw + inner = None + else: # proximal / cvxpy / etc -> (w, iters) + _, inner = raw + outer = None + return {"time_s": t, "outer": outer, "inner": inner} + + +for dataset_name, data_file in DATASETS.items(): + GRAPHS = HERE / "graphs" / dataset_name + GRAPHS.mkdir(exist_ok=True) + + print("=" * 70) + print(f"{dataset_name.upper()} benchmark (long-only minimum variance)") + print("=" * 70) + + df = pd.read_parquet(data_file) + R = df.to_numpy() + R = R - R.mean(axis=0) + _T, N = R.shape + + alpha_lw, target = lw_alpha_and_target(R) + alpha_oas, _ = oas_alpha_and_target(R) + alpha_hard = 0.5 + target_rmt, lr_rmt, k_rmt, alpha_rmt = rmt_target_and_alpha(R) + + print(f"Date range: {df.index[0].date()} -> {df.index[-1].date()}") + print(f"n={N}, T={_T}, n/T={N / _T:.3f}") + print(f"LW oracle alpha = {alpha_lw:.4f}") + print(f"OAS oracle alpha = {alpha_oas:.4f}") + print(f"RMT oracle alpha = {alpha_rmt:.4f} (k={k_rmt} signal factors)") + print(f"Demonstrational alpha = {alpha_hard}") + + results_no_shrink = {} + results_lw_oracle = {} + results_oas_oracle = {} + results_lw = {} + results_rmt = {} + results_pcg = {} + + prob_no_shrink = MinVarProblem(R) + prob_lw_ora = MinVarProblem(R, alpha=alpha_lw, target=target) + prob_oas_ora = MinVarProblem(R, alpha=alpha_oas, target=target) + prob_lw = MinVarProblem(R, alpha=alpha_hard, target=target) + prob_rmt_d = MinVarProblem(R, alpha=alpha_rmt, target=target_rmt, target_lr=lr_rmt) + prob_pcg = MinVarProblem(R, alpha=alpha_lw, target=target, pcg_lr=lr_rmt) + + for sname, fn, is_kkt in SOLVERS_ALL: + results_no_shrink[sname] = _make_entry(prob_no_shrink, fn, is_kkt) + results_lw[sname] = _make_entry(prob_lw, fn, is_kkt) + + for sname, fn, is_kkt in SOLVERS_KEY: + results_lw_oracle[sname] = _make_entry(prob_lw_ora, fn, is_kkt) + results_oas_oracle[sname] = _make_entry(prob_oas_ora, fn, is_kkt) + results_rmt[sname] = _make_entry(prob_rmt_d, fn, is_kkt) + + results_pcg["cvxpy (Clarabel)"] = _make_entry(prob_pcg, lambda p: p.solve_cvxpy(), False) + results_pcg["CG (SPD)"] = _make_entry(prob_pcg, lambda p: p.solve_cg(), False) + results_pcg["PCG (RMT precond)"] = _make_entry(prob_pcg, lambda p: p.solve_pcg(), False) + + print_table("Without shrinkage", results_no_shrink, ref_key="cvxpy (Clarabel)") + print_table(f"Oracle LW (alpha={alpha_lw:.4f})", results_lw_oracle, ref_key="cvxpy (Clarabel)") + print_table(f"Oracle OAS (alpha={alpha_oas:.4f})", results_oas_oracle, ref_key="cvxpy (Clarabel)") + print_table(f"Oracle RMT (alpha={alpha_rmt:.4f}, k={k_rmt})", results_rmt, ref_key="cvxpy (Clarabel)") + print_table(f"Oracle LW + RMT precond (alpha={alpha_lw:.4f}, k={k_rmt})", results_pcg, ref_key="cvxpy (Clarabel)") + print_table(f"Demonstrational LW (alpha={alpha_hard})", results_lw, ref_key="cvxpy (Clarabel)") diff --git a/experiment/experiment_synthetic.py b/experiment/experiment_synthetic.py new file mode 100644 index 0000000..da5ae25 --- /dev/null +++ b/experiment/experiment_synthetic.py @@ -0,0 +1,348 @@ +"""Scaling, iteration, and frontier experiments using simulated data. + + "Shrinkage as Preconditioning: Matrix-Free Methods for + Long-Only Portfolio Optimization" + +Usage: + uv run experiment_synthetic.py # from the experiment/ directory + +Outputs (stdout): + Scaling table — runtime vs n for KKT / CG / proximal (T=1250 fixed). + Iterations table — CG iterations vs alpha (n=500, T=250, rank-deficient). + Frontier table — warm- vs cold-start sweep timings (n=500, T=1250). + +Outputs (files): + graphs/minvar_scaling.pdf — Figure 1: runtime vs n (log-log) + graphs/minvar_iters.pdf — Figure 2: CG iterations vs alpha + graphs/minvar_frontier.pdf — Figure 3: efficient frontier coloured by active assets + +Hardware used in the paper: Apple M4 Pro, 14-core CPU, 48 GB RAM. +Software: Python 3.12, NumPy 2.4, SciPy 1.17, CVXPY 1.8.2, Clarabel 0.11.1. +""" + +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "matplotlib", +# "numpy", +# "scikit-learn", +# "fast-minimum-variance", +# ] +# +# [tool.uv.sources] +# fast-minimum-variance = { path = ".." } +# /// + +from __future__ import annotations + +import time as _time +from pathlib import Path + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +from util.runner import run_timed + +from fast_minimum_variance.data import simulate_equity_returns +from fast_minimum_variance.minvar_problem import _MinVarProblem as MinVarProblem +from fast_minimum_variance.shrinkage.util import ( + lw_alpha_and_target, + lw_alpha_and_target_hard, + rmt_target_and_alpha, +) + +HERE = Path(__file__).parent +GRAPHS = HERE / "graphs" +GRAPHS.mkdir(exist_ok=True) + +mpl.rcParams.update( + { + "font.family": "serif", + "font.size": 9, + "axes.labelsize": 9, + "axes.titlesize": 9, + "legend.fontsize": 8, + "xtick.labelsize": 8, + "ytick.labelsize": 8, + "figure.dpi": 150, + } +) + +# --------------------------------------------------------------------------- +# Panel A: runtime vs n (LW shrinkage, T=1250 fixed) +# --------------------------------------------------------------------------- + +print("=" * 70) +print("Runtime vs n (T=1250 fixed, LW shrinkage, long-only minimum variance)") +print("=" * 70) + +T_FIXED = 1250 +ns = [50, 100, 200, 300, 500, 750, 1000, 1500, 2000, 3000] +times = {k: [] for k in ("kkt", "cg", "proximal", "fista", "rmt_solve")} + +print( + f"{'n':>6} {'k_active':>8} {'kkt(s)':>10} {'kkt_out':>8}" + f" {'cg(s)':>10} {'cg_out':>7} {'cg_in':>7}" + f" {'prox(s)':>10} {'prox_in':>8}" + f" {'fista(s)':>10} {'fista_in':>9}" + f" {'rmt(s)':>10} {'rmt_in':>8} {'k_rmt':>6}" +) +print("-" * 143) + +for n in ns: + R = simulate_equity_returns(n, T_FIXED, rng=n) + alpha, tgt = lw_alpha_and_target_hard(R, alpha=0.5) + prob = MinVarProblem(R, alpha=alpha, target=tgt) + + (w_kkt, kkt_outer), t_kkt = run_timed(lambda p=prob: p.solve_kkt()) + k_active = int((w_kkt > 1e-6).sum()) + (_, cg_outer, cg_inner), t_cg = run_timed(lambda p=prob: p.solve_cg()) + (_, prox_inner), t_prox = run_timed(lambda p=prob: p.solve_proximal()) + (_, fista_inner), t_fista = run_timed(lambda p=prob: p.solve_fista()) + + tgt_rmt_s, lr_rmt_s, k_rmt_s, alpha_rmt_s = rmt_target_and_alpha(R) + prob_rmt_s = MinVarProblem(R, alpha=alpha_rmt_s, target=tgt_rmt_s, target_lr=lr_rmt_s) + (_, rmt_outer_s, rmt_inner_s), t_rmt = run_timed(lambda p=prob_rmt_s: p.solve_cg()) + + times["kkt"].append(t_kkt) + times["cg"].append(t_cg) + times["proximal"].append(t_prox) + times["fista"].append(t_fista) + times["rmt_solve"].append(t_rmt) + print( + f"{n:>6} {k_active:>8} {t_kkt:>10.4f} {kkt_outer:>8}" + f" {t_cg:>10.4f} {cg_outer:>7} {cg_inner:>7}" + f" {t_prox:>10.4f} {prox_inner:>8}" + f" {t_fista:>10.4f} {fista_inner:>9}" + f" {t_rmt:>10.4f} {rmt_inner_s:>8} {k_rmt_s:>6}" + ) + +# --------------------------------------------------------------------------- +# Panel B: CG iterations vs alpha (n=500, T=250, rank-deficient) +# --------------------------------------------------------------------------- + +print() +print("=" * 70) +print("CG iterations vs alpha (n=500, T=250, rank-deficient)") +print("=" * 70) + +n_iter, T_iter = 500, 250 +R_iter = simulate_equity_returns(n_iter, T_iter, rng=1) +_, tgt_iter = lw_alpha_and_target(R_iter) +alphas = np.linspace(0.01, 0.99, 40) +cg_iters = [] + +print(f"{'alpha':>8} {'outer':>7} {'inner':>8}") +for a in alphas: + _, outer, inner = MinVarProblem(R_iter, alpha=a, target=tgt_iter).solve_cg() + cg_iters.append(inner) + print(f"{a:>8.3f} {outer:>7} {inner:>8}") + +# --------------------------------------------------------------------------- +# Figures 1 and 2 +# --------------------------------------------------------------------------- + +COLORS = {"cg": "#ff7f0e", "proximal": "#9467bd", "rmt": "#2ca02c"} +LABELS = { + "cg": "CG (LW, $\\alpha=0.5$)", + "proximal": "Proximal gradient", + "rmt": "CG-RMT (solve)", +} + +# Figure 1: runtime vs n +fig1, ax1 = plt.subplots(figsize=(4.5, 3.2)) +for key in ("cg", "proximal"): + ax1.plot(ns, times[key], marker="o", markersize=4, label=LABELS[key], color=COLORS[key]) +ax1.plot(ns, times["rmt_solve"], marker="s", markersize=4, label=LABELS["rmt"], color=COLORS["rmt"], linestyle="-") +n_arr = np.array(ns, dtype=float) +anchor_idx = ns.index(500) +t_anchor = times["cg"][anchor_idx] +ax1.plot(n_arr, t_anchor * (n_arr / 500.0), color="gray", linestyle="--", linewidth=0.9, label=r"$O(n)$") +ax1.plot(n_arr, t_anchor * (n_arr / 500.0) ** 2, color="gray", linestyle=":", linewidth=0.9, label=r"$O(n^2)$") +ax1.set_xscale("log") +ax1.set_yscale("log") +ax1.set_xlabel("Number of assets $n$") +ax1.set_ylabel("Wall-clock time (s)") +ax1.set_title(r"Runtime vs. $n$ ($T=1250$ fixed)") +ax1.legend(framealpha=0.9) +ax1.grid(True, which="both", linestyle=":", linewidth=0.5, alpha=0.7) +fig1.tight_layout(pad=1.0) +fig1.savefig(GRAPHS / "minvar_scaling.pdf", bbox_inches="tight") +fig1.savefig(GRAPHS / "minvar_scaling.png", bbox_inches="tight", dpi=150) +print(f"\nSaved {GRAPHS / 'minvar_scaling.pdf'}") + +# Figure 2: CG iterations vs alpha +fig2, ax2 = plt.subplots(figsize=(4.5, 3.2)) +ax2.plot(alphas, cg_iters, marker="o", markersize=4, color=COLORS["cg"], label=LABELS["cg"]) +ax2.set_xlabel(r"Shrinkage intensity $\alpha$ ($\kappa$ decreases $\rightarrow$)") +ax2.set_ylabel("CG iterations to convergence") +ax2.set_title(r"CG iterations vs. $\alpha$ ($n=500,\,T=250$)") +ax2.legend(framealpha=0.9) +ax2.grid(True, which="both", linestyle=":", linewidth=0.5, alpha=0.7) +fig2.tight_layout(pad=1.0) +fig2.savefig(GRAPHS / "minvar_iters.pdf", bbox_inches="tight") +fig2.savefig(GRAPHS / "minvar_iters.png", bbox_inches="tight", dpi=150) +print(f"Saved {GRAPHS / 'minvar_iters.pdf'}") + +# --------------------------------------------------------------------------- +# Section 9: Efficient frontier (n=500, T=1250) +# --------------------------------------------------------------------------- + +print() +print("=" * 70) +print("Efficient frontier (n=500, T=1250, multi-solver)") +print("=" * 70) + +n_ef, T_ef = 500, 1250 +R_ef = simulate_equity_returns(n_ef, T_ef, rng=42) +rng_ef = np.random.default_rng(42) +betas_ef = rng_ef.uniform(0.4, 0.8, n_ef) +mu_ef = betas_ef * (0.10 / 250) + +_, tgt_ef = lw_alpha_and_target(R_ef) +alpha_ef = 0.5 +Sigma_ef = (1 - alpha_ef) * (R_ef.T @ R_ef) / T_ef + alpha_ef * tgt_ef +print(f"Frontier alpha (LW) = {alpha_ef}") +target_rmt_ef, lr_rmt_ef, k_rmt_ef, alpha_rmt_ef = rmt_target_and_alpha(R_ef) +Sigma_rmt_ef = target_rmt_ef +print(f"Frontier RMT: alpha={alpha_rmt_ef:.4f}, k={k_rmt_ef} signal factors") + +rhos_ef = np.linspace(0, 2, 50) + + +def _sweep_cold(solve_fn, repeats=3, ef_alpha=None, ef_target=None, ef_target_lr=None): + """Return best-of-repeats list of per-point cold-start times.""" + _alpha = alpha_ef if ef_alpha is None else ef_alpha + _target = tgt_ef if ef_target is None else ef_target + _tgt_lr = None if ef_target_lr is None else ef_target_lr + runs = [] + for _ in range(repeats): + sweep_times = [] + for rho in rhos_ef: + prob = MinVarProblem(R_ef, alpha=_alpha, target=_target, target_lr=_tgt_lr, rho=rho, mu=mu_ef) + t0 = _time.perf_counter() + solve_fn(prob) + sweep_times.append(_time.perf_counter() - t0) + runs.append(sweep_times) + return runs[int(np.argmin([sum(r) for r in runs]))] + + +print("Running cold sweeps...") +ef_times_cvxpy = _sweep_cold(lambda p: p.solve_cvxpy(), repeats=1) +ef_times_osqp = _sweep_cold(lambda p: p.solve_osqp(), repeats=3) +ef_times_proximal = _sweep_cold(lambda p: p.solve_proximal(), repeats=3) +ef_times_cg_cold = _sweep_cold(lambda p: p.solve_cg(), repeats=3) +ef_times_rmt_cold = _sweep_cold( + lambda p: p.solve_cg(), repeats=3, ef_alpha=alpha_rmt_ef, ef_target=target_rmt_ef, ef_target_lr=lr_rmt_ef +) + +print("Running CG warm-start sweep...") +ef_warm_runs = [] +ef_vols, ef_rets, ef_active = [], [], [] +for rep in range(3): + sweep_times = [] + warm = None + vols_r, rets_r, act_r = [], [], [] + for rho in rhos_ef: + prob = MinVarProblem(R_ef, alpha=alpha_ef, target=tgt_ef, rho=rho, mu=mu_ef) + t0 = _time.perf_counter() + w, _, _, warm = prob.solve_cg_warm(warm_start=warm) + sweep_times.append(_time.perf_counter() - t0) + vols_r.append(float(np.sqrt(w @ Sigma_ef @ w)) * np.sqrt(250) * 100) + rets_r.append(float(w @ mu_ef) * 250 * 100) + act_r.append(int((w > 1e-6).sum())) + ef_warm_runs.append(sweep_times) + if rep == 0: + ef_vols, ef_rets, ef_active = vols_r, rets_r, act_r +ef_times_cg_warm = ef_warm_runs[int(np.argmin([sum(r) for r in ef_warm_runs]))] + +print("Running RMT warm-start sweep...") +ef_rmt_warm_runs = [] +ef_vols_rmt, ef_rets_rmt = [], [] +for rep in range(3): + sweep_times = [] + warm = None + vols_r, rets_r = [], [] + for rho in rhos_ef: + prob = MinVarProblem(R_ef, alpha=alpha_rmt_ef, target=target_rmt_ef, target_lr=lr_rmt_ef, rho=rho, mu=mu_ef) + t0 = _time.perf_counter() + w, _, _, warm = prob.solve_cg_warm(warm_start=warm) + sweep_times.append(_time.perf_counter() - t0) + vols_r.append(float(np.sqrt(w @ Sigma_rmt_ef @ w)) * np.sqrt(250) * 100) + rets_r.append(float(w @ mu_ef) * 250 * 100) + ef_rmt_warm_runs.append(sweep_times) + if rep == 0: + ef_vols_rmt, ef_rets_rmt = vols_r, rets_r +ef_times_rmt_warm = ef_rmt_warm_runs[int(np.argmin([sum(r) for r in ef_rmt_warm_runs]))] + +N_PTS = len(rhos_ef) +ref = sum(ef_times_cvxpy) + + +def _row(label, cold_times, warm_times=None): + """Print one row of the frontier timing table.""" + total = sum(cold_times) + per_ms = total / N_PTS * 1000 + spd = ref / total + if warm_times is not None: + w_total = sum(warm_times) + w_per = w_total / N_PTS * 1000 + w_spd = ref / w_total + print( + f" {label:<28} cold: {total:6.3f}s ({per_ms:5.1f}ms/pt, {spd:5.0f}x) " + f"warm: {w_total:6.3f}s ({w_per:5.1f}ms/pt, {w_spd:5.0f}x)" + ) + else: + print(f" {label:<28} cold: {total:6.3f}s ({per_ms:5.1f}ms/pt, {spd:5.0f}x) warm: --") + + +print(f"\n{'Solver':<30} {'Cold total':>12} {'Warm total':>12}") +print("-" * 70) +_row("cvxpy (Clarabel)", ef_times_cvxpy) +_row("OSQP (direct API)", ef_times_osqp) +_row("Proximal gradient", ef_times_proximal) +_row("CG (alpha=0.5, LW)", ef_times_cg_cold, ef_times_cg_warm) +_row(f"CG (RMT, k={k_rmt_ef})", ef_times_rmt_cold, ef_times_rmt_warm) + +total_ef_cold = sum(ef_times_cg_cold) +total_ef_warm = sum(ef_times_cg_warm) +total_rmt_cold = sum(ef_times_rmt_cold) +total_rmt_warm = sum(ef_times_rmt_warm) +print(f"\nCG (LW) warm vs cold speedup: {total_ef_cold / total_ef_warm:.1f}x") +print(f"CG (LW) warm vs CVXPY speedup: {ref / total_ef_warm:.0f}x") +print(f"CG (RMT) warm vs CVXPY speedup: {ref / total_rmt_warm:.0f}x") +print(f"CG (RMT) cold vs CG (LW) cold: {total_ef_cold / total_rmt_cold:.2f}x") + +print(f"\n{'rho':>6} {'vol%ann':>9} {'ret%ann':>9} {'active':>7} {'cg_cold(ms)':>12} {'cg_warm(ms)':>12}") +print("-" * 65) +for rho, vol, ret, act, tc, tw in zip( + rhos_ef, ef_vols, ef_rets, ef_active, ef_times_cg_cold, ef_times_cg_warm, strict=False +): + print(f"{rho:>6.2f} {vol:>9.3f} {ret:>9.3f} {act:>7} {tc * 1000:>12.1f} {tw * 1000:>12.1f}") + +# Figure 3: efficient frontier — LW (coloured by active assets) + RMT overlay +fig3, ax3 = plt.subplots(figsize=(4.5, 3.2)) +sc = ax3.scatter(ef_vols, ef_rets, c=ef_active, cmap="plasma_r", s=20, zorder=3, label=r"LW ($\alpha=0.5$)") +ax3.plot(ef_vols, ef_rets, color="gray", linewidth=0.8, zorder=2) +ax3.plot( + ef_vols_rmt, + ef_rets_rmt, + color="steelblue", + linewidth=1.2, + linestyle="--", + zorder=3, + label=f"RMT ($k={k_rmt_ef}$ factors)", +) +ax3.scatter([ef_vols[0]], [ef_rets[0]], marker="*", s=120, color="#ff7f0e", zorder=4, label="Min-var (LW)") +ax3.scatter([ef_vols_rmt[0]], [ef_rets_rmt[0]], marker="*", s=120, color="steelblue", zorder=4, label="Min-var (RMT)") +cbar = fig3.colorbar(sc, ax=ax3, pad=0.02) +cbar.set_label("Active assets (LW)") +ax3.set_xlabel("Annualised volatility (\\%)") +ax3.set_ylabel("Annualised expected return (\\%)") +ax3.set_title(f"Efficient frontier ($n={n_ef}$, $T={T_ef}$)") +ax3.legend(framealpha=0.9, loc="lower right", fontsize=7) +ax3.grid(True, linestyle=":", linewidth=0.5, alpha=0.7) +fig3.tight_layout(pad=1.0) +fig3.savefig(GRAPHS / "minvar_frontier.pdf", bbox_inches="tight") +fig3.savefig(GRAPHS / "minvar_frontier.png", bbox_inches="tight", dpi=150) +print(f"Saved {GRAPHS / 'minvar_frontier.pdf'}") diff --git a/experiment/fetch_ftse100.py b/experiment/fetch_ftse100.py new file mode 100644 index 0000000..825e3c0 --- /dev/null +++ b/experiment/fetch_ftse100.py @@ -0,0 +1,165 @@ +"""Fetch FTSE 100 daily returns via yfinance and save to parquet.""" + +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "yfinance", +# "pandas", +# "pyarrow", +# ] +# /// + +import yfinance as yf + +# FTSE 100 constituents as of 2025 (with .L suffix for LSE) +# Source: Wikipedia / FTSE Russell June 2025 +FTSE100_TICKERS = [ + "AAF.L", + "AAL.L", + "ABF.L", + "ADM.L", + "AHT.L", + "AIRP.L", + "ALC.L", + "ANTO.L", + "AO.L", + "APA.L", + "AUTO.L", + "AV.L", + "AZN.L", + "BA.L", + "BARC.L", + "BATS.L", + "BBOX.L", + "BEZ.L", + "BKG.L", + "BLND.L", + "BME.L", + "BNZL.L", + "BP.L", + "BSRT.L", + "BT.L", + "CLLN.L", + "CPG.L", + "CRH.L", + "CRDA.L", + "DCC.L", + "DGE.L", + "DPLM.L", + "EDV.L", + "ELN.L", + "ENT.L", + "EXPN.L", + "FCIT.L", + "FERG.L", + "FLTR.L", + "FRES.L", + "GLEN.L", + "GSK.L", + "HIK.L", + "HL.L", + "HLMA.L", + "HLN.L", + "HSBA.L", + "ICP.L", + "IGG.L", + "III.L", + "IMB.L", + "INF.L", + "ITV.L", + "JD.L", + "KGF.L", + "LAND.L", + "LGEN.L", + "LLOY.L", + "LMP.L", + "LSEG.L", + "MKS.L", + "MNDI.L", + "MNG.L", + "MRO.L", + "NG.L", + "NXT.L", + "OCL.L", + "PCF.L", + "PHNX.L", + "PRU.L", + "PSH.L", + "PSN.L", + "PSON.L", + "RB.L", + "RDSA.L", + "REL.L", + "RIO.L", + "RKT.L", + "RMV.L", + "RR.L", + "RS1.L", + "RSA.L", + "SBRY.L", + "SDR.L", + "SGE.L", + "SGRO.L", + "SJP.L", + "SKG.L", + "SMDS.L", + "SMIN.L", + "SMT.L", + "SN.L", + "SPX.L", + "SSE.L", + "STAN.L", + "STJ.L", + "SVT.L", + "TSCO.L", + "TW.L", + "UU.L", + "VOD.L", + "WEIR.L", + "WMH.L", + "WPP.L", + "WTB.L", +] + +START = "2019-06-01" +END = "2026-05-30" +MAX_MISSING_FRAC = 0.10 + +print(f"Downloading {len(FTSE100_TICKERS)} tickers from {START} to {END}...") +raw = yf.download( + FTSE100_TICKERS, + start=START, + end=END, + auto_adjust=True, + progress=True, +)["Close"] + +print(f"\nRaw shape: {raw.shape}") + +# Drop tickers with too much missing data +missing_frac = raw.isna().mean() +keep = missing_frac[missing_frac <= MAX_MISSING_FRAC].index +dropped = set(raw.columns) - set(keep) +if dropped: + print(f"Dropping {len(dropped)} tickers with >{MAX_MISSING_FRAC * 100:.0f}% missing: {sorted(dropped)}") +raw = raw[keep] + +# Forward-fill residual gaps (delisted / bank-holiday gaps) +raw = raw.ffill() + +# Drop any remaining rows with NaN (leading rows before first listing) +raw = raw.dropna() + +# Compute percentage returns +returns = raw.pct_change().dropna() + +print(f"\nFinal returns shape: {returns.shape}") +print(f"Date range: {returns.index[0].date()} to {returns.index[-1].date()}") +print(f"n={returns.shape[1]} assets, T={returns.shape[0]} days, n/T={returns.shape[1] / returns.shape[0]:.3f}") + +# Remove ticker suffix for cleaner column names +returns.columns = [t.replace(".L", "") for t in returns.columns] + +out = "data/ftse100_pct_returns.parquet" +returns.to_parquet(out) +print(f"\nSaved to {out}") diff --git a/experiment/fetch_sp500.py b/experiment/fetch_sp500.py new file mode 100644 index 0000000..9466e09 --- /dev/null +++ b/experiment/fetch_sp500.py @@ -0,0 +1,70 @@ +"""Fetch S&P 500 constituent returns for the last 5 years via yfinance.""" + +# /// script +# requires-python = ">=3.11" +# dependencies = [ +# "yfinance", +# "pandas", +# "numpy", +# "lxml", +# "requests", +# "pyarrow", +# ] +# /// + +import io +from pathlib import Path + +import pandas as pd +import requests +import yfinance as yf + +# ── 1. S&P 500 tickers from Wikipedia ───────────────────────────────────────── + +resp = requests.get( + "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies", + headers={"User-Agent": "Mozilla/5.0 (research script)"}, + timeout=30, +) +resp.raise_for_status() +tables = pd.read_html(io.StringIO(resp.text)) +tickers = tables[0]["Symbol"].str.replace(".", "-", regex=False).tolist() +print(f"Found {len(tickers)} tickers in the S&P 500 index") + +# ── 2. Download 5 years of adjusted close prices ────────────────────────────── + +raw = yf.download( + tickers, + auto_adjust=True, + progress=True, + threads=True, + start=pd.Timestamp("2021-06-01"), + end=pd.Timestamp("2026-06-01"), +)["Close"] + +print(f"\nRaw download: {raw.shape[0]} trading days x {raw.shape[1]} tickers") + +# ── 3. Clean: keep tickers with <5% missing days, forward-fill short gaps ───── + +threshold = 0.05 +missing_frac = raw.isna().mean() +keep = missing_frac[missing_frac <= threshold].index +raw = raw[keep].ffill() + +print(f"After cleaning: {raw.shape[0]} days x {raw.shape[1]} assets") + +# ── 4. Returns ──────────────────────────────────────────────────────────────── + +pct_returns = raw.pct_change().dropna() +print(f"Return matrix shape: {pct_returns.shape} (T={pct_returns.shape[0]}, N={pct_returns.shape[1]})") + +# ── 5. Save ─────────────────────────────────────────────────────────────────── + +out_dir = Path(__file__).parent / "data" + +pct_returns.to_parquet(out_dir / "sp500_pct_returns.parquet") +print("\nSaved data/sp500_pct_returns.parquet") + +print(f"Date range: {pct_returns.index[0].date()} → {pct_returns.index[-1].date()}") +print(f"Assets: {pct_returns.shape[1]}") +print(f"Trading days: {pct_returns.shape[0]}") diff --git a/experiment/graphs/minvar_frontier.pdf b/experiment/graphs/minvar_frontier.pdf new file mode 100644 index 0000000..9d77389 Binary files /dev/null and b/experiment/graphs/minvar_frontier.pdf differ diff --git a/experiment/graphs/minvar_frontier.png b/experiment/graphs/minvar_frontier.png new file mode 100644 index 0000000..1c0d19a Binary files /dev/null and b/experiment/graphs/minvar_frontier.png differ diff --git a/experiment/graphs/minvar_iters.pdf b/experiment/graphs/minvar_iters.pdf new file mode 100644 index 0000000..313415c Binary files /dev/null and b/experiment/graphs/minvar_iters.pdf differ diff --git a/experiment/graphs/minvar_iters.png b/experiment/graphs/minvar_iters.png new file mode 100644 index 0000000..212f0b0 Binary files /dev/null and b/experiment/graphs/minvar_iters.png differ diff --git a/book/marimo/notebooks/graphs/minvar_loglog.pdf b/experiment/graphs/minvar_scaling.pdf similarity index 67% rename from book/marimo/notebooks/graphs/minvar_loglog.pdf rename to experiment/graphs/minvar_scaling.pdf index 81bb869..1950a1b 100644 Binary files a/book/marimo/notebooks/graphs/minvar_loglog.pdf and b/experiment/graphs/minvar_scaling.pdf differ diff --git a/experiment/graphs/minvar_scaling.png b/experiment/graphs/minvar_scaling.png new file mode 100644 index 0000000..56db6d7 Binary files /dev/null and b/experiment/graphs/minvar_scaling.png differ diff --git a/experiment/util/__init__.py b/experiment/util/__init__.py new file mode 100644 index 0000000..cbaf04a --- /dev/null +++ b/experiment/util/__init__.py @@ -0,0 +1 @@ +"""Experiment utilities: timing helpers and benchmark table printer.""" diff --git a/experiment/util/runner.py b/experiment/util/runner.py new file mode 100644 index 0000000..65cfcec --- /dev/null +++ b/experiment/util/runner.py @@ -0,0 +1,32 @@ +"""Timing and result-printing helpers for solver benchmarks.""" + +import time + + +def run_timed(fn, repeats=3): + """Return (result, best_wall_time_s) over `repeats` calls.""" + best = float("inf") + result = None + for _ in range(repeats): + t0 = time.perf_counter() + result = fn() + best = min(best, time.perf_counter() - t0) + return result, best + + +def print_table(label, results, ref_key="cvxpy"): + """Print a benchmark table with speedup relative to ref_key. + + Each entry in results maps to a dict with keys: + time_s float + outer int | None (active-set outer steps; None if no outer loop) + inner int | None (inner solver iterations; None for direct solvers) + """ + ref = results[ref_key]["time_s"] + print(f"\n{label}") + print(f"{'Method':<30} {'Time (s)':>10} {'Outer':>7} {'Inner':>8} {'Speedup':>10}") + print("-" * 70) + for key, v in results.items(): + outer_str = str(v["outer"]) if v.get("outer") is not None else "--" + inner_str = str(v["inner"]) if v.get("inner") is not None else "--" + print(f"{key:<30} {v['time_s']:>10.4f} {outer_str:>7} {inner_str:>8} {ref / v['time_s']:>9.1f}x") diff --git a/paper/bib/refs.bib b/paper/bib/refs.bib new file mode 100644 index 0000000..7dd3129 --- /dev/null +++ b/paper/bib/refs.bib @@ -0,0 +1,326 @@ +@article{halko2011, + author = {Halko, Nathan and Martinsson, Per-Gunnar and Tropp, Joel A.}, + title = {Finding Structure with Randomness: Probabilistic Algorithms for Constructing Approximate Matrix Decompositions}, + journal = {SIAM Review}, + year = {2011}, + volume = {53}, + number = {2}, + pages = {217--288}, +} + +@article{beck2009, + author = {Beck, Amir and Teboulle, Marc}, + title = {A Fast Iterative Shrinkage-Thresholding Algorithm for Linear Inverse Problems}, + journal = {SIAM Journal on Imaging Sciences}, + year = {2009}, + volume = {2}, + number = {1}, + pages = {183--202}, +} + +@article{bajeux2012, + author = {Bajeux-Besnainou, Isabelle and Bandara, Wimal and Bura, Efstathia}, + title = {A {K}rylov subspace approach to large portfolio optimization}, + journal = {Journal of Economic Dynamics and Control}, + year = {2012}, + volume = {36}, + number = {11}, + pages = {1688--1699}, +} + +@article{benzi2005, + author = {Benzi, Michele and Golub, Gene H. and Liesen, Jörg}, + title = {Numerical solution of saddle point problems}, + journal = {Acta Numerica}, + year = {2005}, + volume = {14}, + pages = {1--137}, +} + +@book{bouchaud2003, + author = {Bouchaud, Jean-Philippe and Potters, Marc}, + title = {Theory of Financial Risk and Derivative Pricing: From Statistical Physics to Risk Management}, + publisher = {Cambridge University Press}, + edition = {2nd}, + year = {2003}, +} + +@book{boyd2004, + author = {Boyd, Stephen and Vandenberghe, Lieven}, + title = {Convex Optimization}, + publisher = {Cambridge University Press}, + year = {2004}, +} + +@article{bunch1977, + author = {Bunch, James R. and Kaufman, Linda}, + title = {Some stable methods for calculating inertia and solving symmetric linear systems}, + journal = {Mathematics of Computation}, + year = {1977}, + volume = {31}, + number = {137}, + pages = {163--179}, +} + +@article{chen2010, + author = {Chen, Yilun and Wiesel, Ami and Eldar, Yonina C. and Hero, Alfred O.}, + title = {Shrinkage Algorithms for {MMSE} Covariance Estimation}, + journal = {{IEEE} Transactions on Signal Processing}, + volume = {58}, + number = {10}, + pages = {5016--5029}, + year = {2010}, +} + +@phdthesis{choi2006, + author = {Choi, Sou-Cheng T.}, + title = {Iterative Methods for Singular Linear Equations and Least-Squares Problems}, + school = {Stanford University}, + year = {2006}, + type = {PhD thesis}, +} + +@article{clarabel2024, + author = {Goulart, Paul J. and Chen, Yuwen}, + title = {Clarabel: An interior-point solver for conic optimisation problems with + quadratic objectives}, + journal = {INFORMS Journal on Computing}, + year = {2024}, + volume = {36}, + number = {2}, + pages = {481--499}, +} + +@article{condat2016, + author = {Condat, Laurent}, + title = {Fast projection onto the simplex and the $\ell_1$ ball}, + journal = {Mathematical Programming}, + year = {2016}, + volume = {158}, + number = {1--2}, + pages = {575--585}, +} + +@article{demiguel2009, + author = {DeMiguel, Victor and Garlappi, Lorenzo and Nogales, Francisco J. and Uppal, Raman}, + title = {A Generalized Approach to Portfolio Optimization: Improving Performance by Constraining Portfolio Norms}, + journal = {Management Science}, + year = {2009}, + volume = {55}, + number = {5}, + pages = {798--812}, +} + +@article{diamond2016, + author = {Diamond, S. and Boyd, S.}, + title = {{CVXPY}: A {Python}-embedded modeling language for convex optimization}, + journal = {Journal of Machine Learning Research}, + year = {2016}, + volume = {17}, + number = {83}, + pages = {1--5}, +} + +@inproceedings{duchi2008, + author = {Duchi, John and Shalev-Shwartz, Shai and Singer, Yoram and Chandra, Tushar}, + title = {Efficient Projections onto the $\ell_1$-Ball for Learning in High Dimensions}, + booktitle = {Proceedings of the 25th International Conference on Machine Learning ({ICML})}, + year = {2008}, + pages = {272--279}, + publisher = {ACM}, + address = {New York, NY}, +} + +@misc{fast_minimum_variance, + author = {Schmelzer, T.}, + title = {fast-minimum-variance: Solving Minimum Variance Portfolios Fast}, + howpublished = {\url{https://github.com/Jebel-Quant/fast_minimum_variance}}, + year = {2026}, + note = {{MIT} License}, +} + +@book{fischer1996, + author = {Fischer, Bernd}, + title = {Polynomial Based Iteration Methods for Symmetric Linear Systems}, + publisher = {Wiley-Teubner}, + year = {1996}, +} + +@book{golub2013, + author = {Golub, Gene H. and Van Loan, Charles F.}, + title = {Matrix Computations}, + edition = {4th}, + publisher = {Johns Hopkins University Press}, + year = {2013}, +} + +@book{greenbaum1997, + author = {Greenbaum, Anne}, + title = {Iterative Methods for Solving Linear Systems}, + publisher = {SIAM}, + address = {Philadelphia}, + year = {1997}, + series = {Frontiers in Applied Mathematics}, + volume = {17}, +} + +@article{hestenes1952, + author = {Hestenes, Magnus R. and Stiefel, Eduard}, + title = {Methods of conjugate gradients for solving linear systems}, + journal = {Journal of Research of the National Bureau of Standards}, + year = {1952}, + volume = {49}, + number = {6}, + pages = {409--436}, +} + +@article{laloux1999, + author = {Laloux, Laurent and Cizeau, Pierre and Bouchaud, Jean-Philippe and Potters, Marc}, + title = {Noise Dressing of Financial Correlation Matrices}, + journal = {Physical Review Letters}, + year = {1999}, + volume = {83}, + number = {7}, + pages = {1467--1470}, +} + +@book{lawson1974, + author = {Lawson, Charles L. and Hanson, Richard J.}, + title = {Solving Least Squares Problems}, + publisher = {Prentice-Hall}, + year = {1974}, + address = {Englewood Cliffs, NJ}, +} + +@article{ledoit2003jpm, + author = {Ledoit, Olivier and Wolf, Michael}, + title = {Honey, {I} Shrunk the Sample Covariance Matrix}, + journal = {Journal of Portfolio Management}, + year = {2004}, + volume = {30}, + number = {4}, + pages = {110--119}, +} + +@article{ledoit2004, + author = {Ledoit, O. and Wolf, M.}, + title = {A well-conditioned estimator for large-dimensional covariance matrices}, + journal = {Journal of Multivariate Analysis}, + year = {2004}, + volume = {88}, + number = {2}, + pages = {365--411}, +} + +@article{ledoit2020, + author = {Ledoit, O. and Wolf, M.}, + title = {Analytical nonlinear shrinkage of large-dimensional covariance matrices}, + journal = {Annals of Statistics}, + year = {2020}, + volume = {48}, + number = {5}, + pages = {3043--3065}, +} + +@article{marchenko1967, + author = {Marchenko, Vladimir A. and Pastur, Leonid A.}, + title = {Distribution of eigenvalues for some sets of random matrices}, + journal = {Mathematics of the USSR-Sbornik}, + year = {1967}, + volume = {1}, + number = {4}, + pages = {457--483}, +} + +@article{markowitz1952, + author = {Markowitz, H.}, + title = {Portfolio selection}, + journal = {Journal of Finance}, + year = {1952}, + volume = {7}, + number = {1}, + pages = {77--91}, +} + +@article{markowitz1956, + author = {Markowitz, H.}, + title = {The optimization of a quadratic function subject to linear constraints}, + journal = {Naval Research Logistics Quarterly}, + year = {1956}, + volume = {3}, + number = {1--2}, + pages = {111--133}, +} + +@misc{cvxcla, + author = {Schmelzer, Thomas}, + title = {{cvxcla}: Critical Line Algorithm for Portfolio Optimisation}, + howpublished = {\url{https://github.com/cvxgrp/cvxcla}}, + year = {2024}, + note = {{MIT} License}, +} + +@article{merton1972, + author = {Merton, Robert C.}, + title = {An Analytic Derivation of the Efficient Portfolio Frontier}, + journal = {Journal of Financial and Quantitative Analysis}, + year = {1972}, + volume = {7}, + number = {4}, + pages = {1851--1872}, +} + +@book{nesterov2004, + author = {Nesterov, Yurii}, + title = {Introductory Lectures on Convex Optimization: {A} Basic Course}, + publisher = {Kluwer Academic Publishers}, + address = {Boston, MA}, + year = {2004}, +} + +@book{nocedal2006, + author = {Nocedal, Jorge and Wright, Stephen J.}, + title = {Numerical Optimization}, + edition = {2nd}, + publisher = {Springer}, + address = {New York}, + year = {2006}, +} + +@article{osqp, + author = {Stellato, Bartolomeo and Banjac, Goran and Goulart, Paul and Bemporad, Alberto and Boyd, Stephen}, + title = {{OSQP}: An Operator Splitting Solver for Quadratic Programs}, + journal = {Mathematical Programming Computation}, + volume = {12}, + number = {4}, + pages = {637--672}, + year = {2020}, +} + +@article{paige1975, + author = {Paige, Christopher C. and Saunders, Michael A.}, + title = {Solution of sparse indefinite systems of linear equations}, + journal = {SIAM Journal on Numerical Analysis}, + year = {1975}, + volume = {12}, + number = {4}, + pages = {617--629}, +} + +@phdthesis{schmelzer2004, + author = {Schmelzer, Thomas}, + title = {Block {K}rylov methods for {H}ermitian linear systems}, + school = {Department of Mathematics, University of Kaiserslautern}, + year = {2004}, + type = {Diploma thesis}, +} + +@incollection{simoncini2013, + author = {Simoncini, Valeria and Szyld, Daniel B.}, + title = {On the Superlinear Convergence of {MINRES}}, + booktitle = {Numerical Mathematics and Advanced Applications 2011}, + publisher = {Springer}, + year = {2013}, + pages = { }, + note = {Proceedings of {ENUMATH} 2011}, +} diff --git a/paper/minvar_paper.tex b/paper/minvar_paper.tex new file mode 100644 index 0000000..649203a --- /dev/null +++ b/paper/minvar_paper.tex @@ -0,0 +1,70 @@ +\documentclass{siam/siamart} + +\usepackage{amsmath, amssymb} +\usepackage{booktabs} +\usepackage{microtype} +\usepackage{xcolor} +\usepackage{graphicx} +\usepackage{algpseudocode} +\usepackage{authblk} + +\newcommand{\tcr}[1]{{\color{red}#1}} +\newcommand{\tcm}[1]{{\color{magenta}#1}} +\newcommand{\tcg}[1]{{\color{green}#1}} +\newcommand{\tcb}[1]{{\color{blue}#1}} +\newcommand{\tbr}[1]{{\color{brown}#1}} +\newcommand{\norm}[1]{\lVert #1 \rVert} + + +\newsiamremark{remark}{Remark} +\crefname{remark}{Remark}{Remarks} +% Fix siamart.cls's refstepcounter override: it strips \@currentcounter, +% which TeX Live 2026's kernel and cleveref's First Aid patch both require. +\makeatletter +\def\cref@old@refstepcounter#1{% + \stepcounter{#1}% + \edef\reserved@a{#1}% + \ifx\reserved@a\ltx@star@counter\else + \let\@currentcounter\reserved@a + \fi + \protected@edef\@currentlabel{% + \csname p@#1\expandafter\endcsname\csname the#1\endcsname}% +}% +\makeatother + + +\title{Krylov Methods for the Minimum Variance Portfolio} + +\author[1]{Thomas Schmelzer} +\author[2]{Martin Stoll} +\author[3,4]{Michael Wolf} + +\affil[1]{\small Jebel Quant Research, Abu Dhabi} +\affil[2]{\small Faculty of Mathematics, TU Chemnitz} +\affil[3]{\small Department of Economics, University of Zurich} +\affil[4]{\small ADIA Lab, Abu Dhabi} + + +\headers{Krylov Methods for the Minimum Variance Portfolio}{T.~Schmelzer, M.~Wolf, and M.~Stoll} + +\begin{document} + +\maketitle + +\begin{abstract} +\input{abstract} +\end{abstract} + +\input{s1_introduction} +\input{s2s3_problem} +\input{s4_nonneg} +\input{s5_shrinkage} +\input{s6_methods} +\input{s7_results} +\input{s8_conclusions} + +% \bibliographystyle{siam/siamplain} +\bibliographystyle{siam} +\bibliography{bib/refs} + +\end{document} diff --git a/paper/s1_introduction.tex b/paper/s1_introduction.tex new file mode 100644 index 0000000..538a0e1 --- /dev/null +++ b/paper/s1_introduction.tex @@ -0,0 +1,77 @@ +\section{Introduction} + +The long-only minimum variance portfolio allocates capital across $n$ assets to minimize +portfolio return variance subject to two constraints: the weights must sum to one (the +portfolio is fully invested) and must be non-negative. The only input required in theory +is the $n \times n$ covariance matrix $\Sigma$ of the $n \times 1$ vector of asset +returns. In practice, $\Sigma$ is unknown and must be estimated from historical data. +To this end, one uses a $T\times n$ matrix of demeaned returns $X$, where $T$ denotes +the length of the estimation window. The classical estimator of $\Sigma$ is the sample +covariance matrix $\hat \Sigma = X^\top X/T$. + +The Lagrangian for the problem +\[ +\min_w\; w^\top \hat \Sigma w \quad \text{subject to} \quad \mathbf{1}^\top w = 1, +\] +where $\mathbf{1}$ denotes a conformable vector of ones, is +\[ +\mathcal{L}(w,\lambda) = w^\top \hat \Sigma w - \lambda(\mathbf{1}^\top w - 1), +\] +whose stationarity condition $2 \hat \Sigma w = \lambda\mathbf{1}$ immediately gives +$w \propto \hat \Sigma^{-1}\mathbf{1}$; this was made explicit by \cite{merton1972} in +the context of the efficient frontier. The budget constraint is recovered by the single +rescaling +\[ +w = v/(\mathbf{1}^\top v), \qquad v = \hat \Sigma^{-1}\mathbf{1}. +\] +What has \emph{not} been exploited is this structure computationally: the standard +pipeline still forms $\hat \Sigma = X^\top X/T$ explicitly before solving. + +This paper makes two contributions. First, we apply conjugate gradients (CG) to +$\hat \Sigma v = \mathbf{1}$ \emph{matrix-free}: $\hat \Sigma = X^\top X/T$ is never +assembled. Each CG iteration requires only two matrix-vector products with $X$, reducing +per-iteration cost from $O(n^2)$ to $O(nT)$ and eliminating $O(n^2)$ storage. +Non-negative weights are enforced by a primal-dual active-set loop that calls the CG +solver on successively smaller subproblems. Because CG is a Krylov method, it builds +successive iterates in the Krylov subspace $\mathcal{K}_k(\hat\Sigma, \mathbf{1})$ and +achieves $O(\sqrt\kappa)$ convergence, where $\kappa = \kappa(\hat\Sigma)$ +is the spectral condition number (Proposition~\ref{prop:cg}). This $\sqrt\kappa$ rate is the defining advantage +of Krylov methods over general first-order schemes, which converge in $O(\kappa)$ +iterations without exploiting the subspace structure (Proposition~\ref{prop:proximal}). + +Second, we show that Ledoit-Wolf shrinkage~\cite{ledoit2004}, proposed to yield a more +accurate estimator of the true covariance matrix $\Sigma$ than the sample covariance +matrix, plays a dual role in this framework. Statistically, it reduces estimation error +by shrinking sample eigenvalues toward their mean. Numerically, the same modification +compresses the eigenvalue spread of the system matrix and directly accelerates CG +convergence (Proposition~\ref{prop:shrinkage}). On S\&P~500 equity data, LW shrinkage reduces the number of CG iterations +from 744 to 82. Statistical regularization and spectral preconditioning are, in this +problem, the same operation applied to the same matrix---a connection that does not +appear to have been made explicit in the existing literature. + +CG of Hestenes and Stiefel~\cite{hestenes1952} is a cornerstone of large-scale +scientific computing; see, e.g., Golub and Van Loan~\cite{golub2013}. To the best of +our knowledge it has not been applied to portfolio optimisation in the matrix-free form +described here. The closest related work applies GMRES to the minimum variance problem +when $\hat \Sigma$ is singular~\cite{bajeux2012}; that formulation still assembles +$X^\top X$ and does not exploit the SPD structure or consider preconditioning. +General-purpose solvers such as Clarabel~\cite{clarabel2024} and OSQP~\cite{osqp} handle +the long-only constraint natively but require an assembled covariance matrix as input. +Markowitz's Critical Line Algorithm~\cite{markowitz1956} traces the full efficient +frontier via simplex-like pivots at $O(n^2)$ each; it targets the entire frontier +rather than a single minimum-variance solve and does not exploit the matrix-free +structure of $\hat\Sigma = X^\top X/T$. + +For completeness, Section~\ref{sec:primaldual} also describes a simplex-projection +strategy based on proximal gradient descent. This method is \emph{not} a Krylov method: +it applies the gradient operator and projects onto the probability simplex at each step +without building a Krylov subspace, and consequently converges in $O(\kappa)$ rather than +$O(\sqrt\kappa)$ iterations. It is included as a matrix-free first-order comparator that +requires no outer active-set loop. + +The remainder of the paper is organized as follows. +Sections~\ref{sec:problem}--\ref{sec:solvers} develop the problem formulation, +non-negativity strategies, Ledoit-Wolf preconditioning, and concrete solver +implementations. Section~\ref{sec:results} reports empirical results on S\&P~500 +and synthetic data, benchmarking against direct KKT factorization, Clarabel, OSQP, +and proximal gradient. diff --git a/paper/s2s3_problem.tex b/paper/s2s3_problem.tex new file mode 100644 index 0000000..bb9c370 --- /dev/null +++ b/paper/s2s3_problem.tex @@ -0,0 +1,128 @@ +\section{Problem Formulation}\label{sec:problem} +The long-only mean-variance portfolio solves +\[ +\min_{w}\; w^\top\Sigma\, w - \rho\,\mu^\top w +\quad\text{subject to}\quad +\mathbf{1}^\top w = 1,\quad w\geq 0~, +\] +where $\Sigma$ is the $n\times n$ covariance matrix of returns, $\mu$ is the $n\times 1$ vector of expected returns, and $\rho\geq 0$ controls the risk-return trade-off; setting $\rho=0$ recovers the pure long-only minimum variance portfolio. +Since both $\Sigma$ and +$\mu$ are unknown, in practice they must be replaced by estimates. Let $X\in\mathbb{R}^{T\times n}$ denote the demeaned return matrix for $n$ assets over the previous $T$ trading periods. The feasible problem formulation then is +\[ +\min_{w}\; w^\top\hat\Sigma\, w - \rho\,\hat\mu^\top w +\quad\text{subject to}\quad +\mathbf{1}^\top w = 1,\quad w\geq 0~. +\] +The classical choice for estimating $\Sigma$ +the sample covariance matrix $\hat\Sigma = X^\top X/T$, for which $w^\top\hat\Sigma\, w = \norm{Xw}^2/T$, and the problem becomes +\[ +\min_{w}\;\frac{\norm{Xw}^2}{T} - \rho\,\hat\mu^\top w +\quad\text{subject to}\quad +\mathbf{1}^\top w = 1,\quad w\geq 0~. +\] +This reframing is key: the variance term is now a normalised squared norm of $Xw$, and the gradient $w\mapsto X^\top(Xw)$ can be evaluated with two matrix--vector products, without ever forming $X^\top X$. + +\begin{table}[ht] +\centering +\begin{tabular}{lll} +\toprule +Parameter & Type & Description \\ +\midrule +$X$ & $\mathbb{R}^{T \times n}$ & Demeaned return matrix \\ +$\rho$ & $\mathbb{R}_{\geq 0}$ & Return-tilt weight (default $0$) \\ +$\mu$ & $\mathbb{R}^n$ & Expected returns (default $\mathbf{0}$) \\ +\bottomrule +\end{tabular} +\caption{Parameters of the \texttt{Problem} specification.} +\label{tab:params} +\end{table} + +\paragraph{Dual problem} +Including multipliers for both constraints, the full Lagrangian is +\[ +\mathcal{L}(w,\, \lambda,\, \nu) += \frac{\lVert Xw\rVert^2}{T} - \rho\hat\mu^\top w + - \lambda(\mathbf{1}^\top w - 1) - \nu^\top w\;, +\] +where $\lambda\in\mathbb{R}$ is the budget multiplier and $\nu\geq\mathbf{0}$ is the vector of +non-negativity multipliers. The stationarity condition +\[ +\tfrac{2}{T}\, X^\top X w = \lambda\mathbf{1}+\nu+\rho\hat\mu +\] +gives $w^* = \tfrac{T}{2}(X^\top X)^{-1}s$ with $s = \lambda\mathbf{1}+\nu+\rho\hat\mu$. +Substituting into $\mathcal{L}$: +\[ +\mathcal{L}(w^*,\,\lambda,\,\nu) += \tfrac{1}{T}(w^*)^\top X^\top X w^* - s^\top w^* + \lambda\;. +\] +From stationarity $\tfrac{1}{T}X^\top X w^* = \tfrac{s}{2}$, so the first term equals +$\tfrac{1}{2}s^\top w^*$, giving +\[ +g(\lambda,\,\nu) += \tfrac{1}{2}s^\top w^* - s^\top w^* + \lambda += -\tfrac{1}{2}s^\top w^* + \lambda\;. +\] +Substituting $w^* = \tfrac{T}{2}(X^\top X)^{-1}s$ yields the dual function +\[ +g(\lambda,\,\nu) += \inf_w\; \mathcal{L}(w,\,\lambda,\,\nu) += \lambda - \tfrac{T}{4}\, + (\lambda\mathbf{1}+\nu+\rho\hat\mu)^\top(X^\top X)^{-1} + (\lambda\mathbf{1}+\nu+\rho\hat\mu)\;, +\] +defined when $X$ has full column rank; the dual problem is +$\max_{\lambda,\,\nu\geq\mathbf{0}}\; g(\lambda,\,\nu)$. +The feasible set $\{w:\mathbf{1}^\top w=1,\,w\geq\mathbf{0}\}$ is compact and +$w=\tfrac{1}{n}\mathbf{1}$ is strictly feasible, so Slater's condition holds and +strong duality obtains. + +At the optimum, complementary slackness ($\nu_i w_i = 0$ for all $i$) partitions assets +into two types. \emph{Active} assets ($w_i>0$) have $\nu_i=0$; stationarity then gives +\[ +\tfrac{2}{T}\,(X^\top Xw)_i = \lambda+\rho\hat\mu_i\;, +\] +equating each active asset's marginal variance to the budget shadow price adjusted for its +expected return. \emph{Excluded} assets ($w_i=0$) satisfy +\[ +\nu_i = \tfrac{2}{T}\,(X^\top Xw)_i - \lambda - \rho\hat\mu_i \geq 0~. +\] +Their marginal variance exceeds this threshold, so holding them at a positive weight would +increase variance net of return. The primal--dual loop in Section~\ref{sec:primaldual} is a direct +implementation of these conditions: it terminates precisely when primal and dual feasibility +both hold, which---together with stationarity from each inner solve---is sufficient for +global optimality. + +\section{Positive-Definite Reduction}\label{sec:pd} + +Ignoring the inequality constraints for now, the Lagrangian for the equality-constrained problem is: +\[ +\mathcal{L}(w, \lambda) = \frac{\|Xw\|^2}{T} - \lambda(\mathbf{1}^\top w - 1)~ + \rho \hat \mu~. +\] +Setting the derivative to zero gives the stationarity condition +$\frac{2}{T}\hat\Sigma\, w = \lambda \mathbf{1} + \rho\hat \mu$, where $\hat\Sigma = X^\top X/T$. +For $\rho = 0$, this reduces to $\frac{2}{T}\hat\Sigma\, w = \lambda\mathbf{1}$, determining the solution up to a scalar. + +\paragraph{Positive-definite reduction} +When $\rho = 0$ (pure long-only minimum variance), stationarity gives $2\hat\Sigma\, w = \lambda\mathbf{1}$, +so $w$ is a scalar multiple of $\hat\Sigma^{-1}\mathbf{1}$. +Setting $v_1 = \hat\Sigma^{-1}\mathbf{1}$ and writing $w = c\,v_1$, the budget constraint +$\mathbf{1}^\top w = 1$ determines $c$: +\[ + c = \frac{1}{\mathbf{1}^\top v_1} \quad \mbox{and} \quad w = \frac{v_1}{\mathbf{1}^\top v_1}~. +\] +When $\rho \neq 0$, stationarity gives $2\hat\Sigma\, w = \lambda\mathbf{1} + \rho\hat \mu$, so +$w = \tfrac{\lambda}{2}\,\hat\Sigma^{-1}\mathbf{1} + \tfrac{\rho}{2}\,\hat\Sigma^{-1}\hat \mu$. +Setting $v_1 = \hat\Sigma^{-1}\mathbf{1}$ and $v_2 = \hat\Sigma^{-1}\hat \mu$ (two SPD solves), the budget +constraint determines the multiplier +\[ + \lambda = \frac{2 - \rho\,(\mathbf{1}^\top v_2)}{\mathbf{1}^\top v_1}~, +\] +and the weights follow as $w = \tfrac{\lambda}{2}\,v_1 + \tfrac{\rho}{2}\,v_2$. +In both cases the $(n+1)\times(n+1)$ saddle-point system is never needed and $\lambda$ is +recovered analytically rather than as part of the solve. +The long-only problem therefore reduces to solving a symmetric positive definite system +$\hat\Sigma\,v = b$ for one or two right-hand sides, subject to the remaining inequality +constraint $w \geq 0$; the latter is addressed in Section~\ref{sec:primaldual}, which +presents two strategies for doing so. Section~\ref{sec:precond} shows how Ledoit-Wolf +shrinkage replaces $\hat\Sigma$ with a better-conditioned estimator $\hat\Sigma_{\mathrm{LW}}$ +without changing this reduction. diff --git a/paper/s4_nonneg.tex b/paper/s4_nonneg.tex new file mode 100644 index 0000000..5f584f8 --- /dev/null +++ b/paper/s4_nonneg.tex @@ -0,0 +1,118 @@ +\section{Enforcing Non-Negativity}\label{sec:primaldual} + +Section~\ref{sec:pd} reduces the long-only problem to solving an SPD system, leaving +$w \geq 0$ as the sole remaining constraint. Two strategies are available. + +\paragraph{Active-set strategy} +The first strategy separates the budget constraint from the non-negativity constraint. +The budget-only problem is solved over a dynamically adjusted \emph{active set} of +assets; non-negativity is then enforced iteratively without ever modifying the inner +solve. + +In the \emph{primal step}, any asset with a negative weight is dropped from the active +set and the budget-constrained SPD system is re-solved on the reduced universe. Once +all active weights are non-negative, the \emph{dual step} checks the KKT gradient +condition for every excluded asset: global optimality requires every excluded asset $i$ +to satisfy +\[ + \nabla_i f(w) \;\geq\; \lambda\;, + \qquad + \nabla_i f(w) = \tfrac{2}{T}(X^\top Xw)_i - \rho\hat\mu_i\;, +\] +where $\lambda$ is the budget Lagrange multiplier recovered analytically from +Section~\ref{sec:pd}. An excluded asset that violates this condition would decrease +portfolio variance if held at a small positive weight; it is re-added to the active set +and the loop repeats. The loop terminates when both primal and dual feasibility hold; +together with stationarity from the inner solve this certifies global optimality. + +\begin{algorithm}[ht] +\caption{Active-set outer loop (wraps any inner solver $f$)}\label{alg:elim} +\begin{algorithmic}[1] +\Require Inner solver $f$;\; tolerance $\varepsilon = 10^{-6}$ +\Ensure Weight vector $w \in \mathbb{R}^n$;\; total inner iteration count $k$ +\State $\mathrm{active} \leftarrow \{1,\ldots,n\}$;\; $k \leftarrow 0$ +\Loop + \State $(w_{\mathrm{a}},\, k_{\mathrm{step}}) \leftarrow f(\mathrm{active})$;\; $k \leftarrow k + k_{\mathrm{step}}$ + \If{$\exists\, i \in \mathrm{active}$ with $w_{a,i} < -\varepsilon$} + \State $\mathrm{active} \leftarrow \mathrm{active} \setminus \{i : w_{a,i} < -\varepsilon\}$ + \hfill\textit{(primal step: drop violators)} + \Else + \State $w_i \leftarrow w_{a,i}$ for $i \in \mathrm{active}$;\; $w_i \leftarrow 0$ for $i \notin \mathrm{active}$ + \State $g_i \leftarrow \tfrac{2}{T}(X^\top Xw)_i - \rho\hat\mu_i$ + \State $\lambda \leftarrow \mathrm{mean}_{j \in \mathrm{active}}(g_j)$ + \State $\mathcal{V} \leftarrow \{i \notin \mathrm{active} : g_i < \lambda - \varepsilon\}$ + \If{$\mathcal{V} = \emptyset$} + \State \textbf{break} \hfill\textit{(KKT satisfied; globally optimal)} + \EndIf + \State $\mathrm{active} \leftarrow \mathrm{active} \cup \mathcal{V}$ + \hfill\textit{(dual step: re-add violators)} + \EndIf +\EndLoop +\State \Return $w$, $k$ +\end{algorithmic} +\end{algorithm} + +\paragraph{Simplex-projection strategy} +The second strategy treats the two constraints as a single geometric object: the +probability simplex +$\Delta_n = \{w \in \mathbb{R}^n : \mathbf{1}^\top w = 1,\, w \geq 0\}$. +Rather than separating budget and non-negativity, gradient steps of the form +\[ + w_{k+1} = \Pi_{\Delta_n}\!\left(w_k - \tau\,\nabla f(w_k)\right) +\] +project directly onto $\Delta_n$ after each descent step, where $\Pi_{\Delta_n}$ +is the Euclidean projection. +Duchi et al.\ \cite{duchi2008} and Condat~\cite{condat2016} show that $\Pi_{\Delta_n}$ can be computed in +$O(n\log n)$ by a single sort-and-threshold pass, making each iteration cheap. +There is no outer loop: both constraints are handled simultaneously, and the method +terminates when the step size $\|w_{k+1} - w_k\|$ falls below a tolerance. + +Although $\hat\Sigma = \tilde{X}^\top\tilde{X}$ admits a stacked-matrix +representation (Section~\ref{sec:precond}), assembling or storing $\tilde{X}$ is +unnecessary. The gradient decomposes additively as +\[ + \nabla f(w) = \hat\Sigma\,w + = \frac{1-\alpha}{T}\,X^\top(Xw) + \alpha\,\hat\Sigma_0\,w\,, +\] +and each term is applied separately: the data contribution costs $O(nT)$ via two +matrix-vector products with $X$, and the target contribution costs $O(n^2)$ for a general +$\hat\Sigma_0$ or $O(n)$ for a scaled identity. The Lipschitz constant +$L = \lambda_{\max}(\hat\Sigma)$, needed for the step size $\tau = 1/(2L)$, +is estimated once by power iteration on the same two-term operator, also matrix-free. + +\begin{algorithm}[ht] +\caption{Simplex-projection solver (proximal gradient, matrix-free)}\label{alg:proximal} +\begin{algorithmic}[1] +\Require Returns matrix $X \in \mathbb{R}^{T\times n}$;\; shrinkage intensity $\alpha$;\; + target $\hat\Sigma_0 \in \mathbb{R}^{n\times n}$;\; tolerance $\varepsilon = 10^{-6}$ +\Ensure Weight vector $w \in \Delta_n$ +\State Define operator $\mathcal{A}$:\; $v \mapsto \tfrac{1-\alpha}{T}X^\top(Xv) + \alpha\,\hat\Sigma_0 v$ + \hfill\textit{($\hat\Sigma$ applied matrix-free)} +\State Estimate $L \leftarrow \lambda_{\max}(\mathcal{A})$ by power iteration + \hfill\textit{($O(n_{\mathrm{pow}}\cdot nT)$)} +\State $\tau \leftarrow (2L)^{-1}$;\; initialise $w_0 \in \mathbb{R}^n$ randomly;\; $k \leftarrow 0$ +\Loop + \State $g \leftarrow \mathcal{A}(w_k)$ + \hfill\textit{(gradient, $O(nT)$)} + \State $w_{k+1} \leftarrow \Pi_{\Delta_n}(w_k - \tau\,g)$ + \hfill\textit{(simplex projection, $O(n\log n)$)} + \If{$\|w_{k+1} - w_k\|_2 \leq \varepsilon$} + \textbf{ break} + \EndIf + \State $k \leftarrow k + 1$ +\EndLoop +\State \Return $w_{k+1}$ +\end{algorithmic} +\end{algorithm} + +\medskip +The two strategies make different tradeoffs. The active-set loop solves a sequence of +linear systems and benefits from fast inner solvers (Section~\ref{sec:solvers}); its +convergence to global optimality is certified by the KKT check. The simplex-projection +approach is single-stage and loop-free; unlike CG, it is not a Krylov method---each +step applies the gradient operator and projects onto $\Delta_n$ without building a Krylov +subspace, so residuals are not orthogonalised against prior directions. This is why its +convergence is $O(\kappa)$ gradient steps versus $O(\sqrt{\kappa})$ for CG; both methods +share the same $O(nT)$ per-step cost, so the $\sqrt{\kappa}$ gap in iteration count +translates directly to a runtime gap on ill-conditioned problems. +Section~\ref{sec:solvers} develops concrete implementations of both strategies. diff --git a/pyproject.toml b/pyproject.toml index 9cc288d..af9040d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ dev = [ "pandas>=3.0", "pyarrow>=24.0.0", "requests>=2.0", + "scikit-learn>=1.5", "yfinance>=0.2" ] diff --git a/src/fast_minimum_variance/__init__.py b/src/fast_minimum_variance/__init__.py index 1282c30..6aa9c97 100644 --- a/src/fast_minimum_variance/__init__.py +++ b/src/fast_minimum_variance/__init__.py @@ -64,4 +64,6 @@ def Problem( # noqa: N802 return _Problem(X, target=target, A=A, b=b, C=C, d=d, alpha=alpha, rho=rho, mu=mu) -__all__ = ["Problem"] +from .data import simulate_equity_returns # noqa: E402 + +__all__ = ["Problem", "simulate_equity_returns"] diff --git a/src/fast_minimum_variance/_base.py b/src/fast_minimum_variance/_base.py index 5ca6fb2..a092422 100644 --- a/src/fast_minimum_variance/_base.py +++ b/src/fast_minimum_variance/_base.py @@ -35,12 +35,20 @@ class _BaseProblem(ABC): alpha: float = 0.0 rho: float = 0.0 mu: np.ndarray | None = None + target_lr: tuple | None = None # (bar_lam, U_k, delta_k) — low-rank + identity factors + pcg_lr: tuple | None = None # (bar_lam, U_k, delta_k) — RMT preconditioner (§5.3) def __post_init__(self): - """Validate target shape when supplied; shrinkage is only active when target is not None.""" + """Validate target/target_lr shapes when supplied.""" n = self.n if self.target is not None and self.target.shape != (n, n): raise ValueError(f"target must be a square {n} x {n} matrix, got {self.target.shape}") # noqa: TRY003 + if self.target_lr is not None: + _bar_lam, U_k, delta_k = self.target_lr # noqa: N806 + if U_k.shape[0] != n or U_k.shape[1] != delta_k.shape[0]: + raise ValueError( # noqa: TRY003 + f"target_lr: U_k must be ({n}, k) and delta_k (k,), got {U_k.shape}, {delta_k.shape}" + ) # ------------------------------------------------------------------ # Shared utilities @@ -126,13 +134,13 @@ def solve_kkt(self, *, project: bool = True): >>> bool((w >= 0).all()) True """ - w, iters = self._constraint_active_set(self._kkt_step) + w, outer, _inner = self._constraint_active_set(self._kkt_step) if project: w = self._clip_and_renormalize(w) - return w, iters + return w, outer - def solve_cvxpy(self, *, project: bool = True): - """Solve via CVXPY / Clarabel (reference interior-point solver). + def solve_cvxpy(self, *, project: bool = True, backend: str = "clarabel"): + """Solve via CVXPY with a configurable backend solver. Requires the ``convex`` extra:: @@ -140,9 +148,11 @@ def solve_cvxpy(self, *, project: bool = True): Args: project: Clip and renormalize after solving (see ``solve_kkt``). + backend: CVXPY solver name (default ``"clarabel"``; ``"osqp"`` is + also supported). Returns: - ``(w, n_iters)`` — weight vector of shape ``(N,)`` and Clarabel + ``(w, n_iters)`` — weight vector of shape ``(N,)`` and solver iteration count. Examples: @@ -167,8 +177,9 @@ def solve_cvxpy(self, *, project: bool = True): if self.rho != 0.0 and self.mu is not None: objective = objective - self.rho * (self.mu @ w) + cvxpy_solver = cp.CLARABEL if backend.lower() == "clarabel" else cp.OSQP problem = cp.Problem(cp.Minimize(objective), self._cvxpy_constraints(w, cp)) - problem.solve(solver=cp.CLARABEL) + problem.solve(solver=cvxpy_solver) result = w.value if result is None: @@ -185,23 +196,56 @@ def solve_cg(self, *, project: bool = True): after solving. Set to ``False`` for custom constraints. Returns: - ``(w, n_iters)`` — weight vector of shape ``(N,)`` and total CG - iteration count across all outer active-set steps. + ``(w, outer_steps, inner_iters)`` — weight vector, number of outer + active-set steps, and total CG iterations summed across all steps. Examples: >>> import numpy as np >>> from fast_minimum_variance import Problem >>> X = np.random.default_rng(0).standard_normal((100, 5)) - >>> w, iters = Problem(X).solve_cg() + >>> w, outer, inner = Problem(X).solve_cg() >>> float(round(w.sum(), 10)) 1.0 >>> bool((w >= 0).all()) True """ - w, iters = self._constraint_active_set(self._cg_step) + w, outer, inner = self._constraint_active_set(self._cg_step) if project: w = self._clip_and_renormalize(w) - return w, iters + return w, outer, inner + + def solve_pcg(self, *, project: bool = True): + """Solve via matrix-free PCG with RMT preconditioner (Section 5.3). + + Solves ``Sigma_LW_oracle x = 1`` using ``T0^RMT`` as preconditioner. + Requires ``pcg_lr = (bar_lam, U_k, delta_k)`` from RMT preprocessing. + The preconditioner is applied via the Woodbury identity at O(nk) per step; + the system matvec costs O(nT). Returns the oracle-LW minimum-variance + portfolio — not the RMT portfolio — in O(sqrt(1/alpha_oracle)) iterations. + + Returns: + ``(w, outer_steps, inner_iters)`` + + Examples: + >>> import numpy as np + >>> from fast_minimum_variance import Problem + >>> rng = np.random.default_rng(0) + >>> X = rng.standard_normal((100, 5)) + >>> bar_lam = float(np.trace(X.T @ X / 100) / 5) + >>> U_k = np.eye(5, 2) + >>> delta_k = np.array([0.1, 0.05]) + >>> w, outer, inner = Problem(X, alpha=0.1, pcg_lr=(bar_lam, U_k, delta_k)).solve_pcg() + >>> float(round(w.sum(), 10)) + 1.0 + >>> bool((w >= 0).all()) + True + """ + if self.pcg_lr is None: + raise ValueError("pcg_lr must be set; pass pcg_lr=(bar_lam, U_k, delta_k)") # noqa: TRY003 + w, outer, inner = self._constraint_active_set(self._pcg_step) + if project: + w = self._clip_and_renormalize(w) + return w, outer, inner def solve_nnls(self, *, project: bool = True): """Solve via non-negative least squares (scipy.optimize.nnls). @@ -235,6 +279,64 @@ def solve_nnls(self, *, project: bool = True): w = self._clip_and_renormalize(w) return w, iters + def solve_osqp(self, *, project: bool = True): + """Solve via OSQP (operator-splitting QP solver). + + Assembles ``P = 2·Σ_LW`` as a sparse upper-triangular CSC matrix and + calls OSQP directly. Returns ``(w, iters)`` where ``iters`` is the + OSQP iteration count. + + Args: + project: Clip and renormalize after solving (see ``solve_kkt``). + + Returns: + ``(w, n_iters)`` — weight vector of shape ``(N,)`` and number of + OSQP iterations. + + Examples: + >>> import numpy as np + >>> from fast_minimum_variance import Problem + >>> X = np.random.default_rng(0).standard_normal((100, 5)) + >>> w, iters = Problem(X).solve_osqp() + >>> float(round(w.sum(), 6)) + 1.0 + >>> bool((w >= -1e-6).all()) + True + """ + n = self.n + + if self.target is None: + p_dense = 2.0 * ((self.X.T @ self.X) / self.t) + else: + p_dense = 2.0 * ((1 - self.alpha) * (self.X.T @ self.X) / self.t + self.alpha * self.target) + + p_upper = triu(csc_matrix(p_dense), format="csc") + + q = np.zeros(n) + if self.rho != 0.0 and self.mu is not None: + q = -self.rho * self.mu + + a_mat, l_vec, u_vec = self._osqp_constraints() + + solver = osqp.OSQP() + solver.setup( + p_upper, + q, + a_mat, + l_vec, + u_vec, + warm_starting=True, + verbose=False, + eps_abs=1e-8, + eps_rel=1e-8, + ) + result = solver.solve() + + w = np.array(result.x) + if project: + w = self._clip_and_renormalize(w) + return w, result.info.iter + def solve_clarabel(self, *, project: bool = True): """Solve via Clarabel interior-point solver (direct API, no CVXPY overhead). @@ -284,52 +386,98 @@ def solve_clarabel(self, *, project: bool = True): w = self._clip_and_renormalize(w) return w, sol.iterations - def solve_osqp(self, *, project: bool = True): - """Solve via OSQP (operator-splitting QP solver, direct API, no CVXPY overhead). + def solve_proximal(self, *, project: bool = True): + """Solve via proximal gradient descent projected onto the probability simplex. - Assembles ``P = 2·Σ_LW`` as a sparse upper-triangular CSC matrix and - calls OSQP directly. The problem-specific constraint data is supplied - by ``_osqp_constraints``. Returns ``(w, iters)`` where ``iters`` is - the number of ADMM iterations. + Minimises ``0.5 * w^T Σ_LW w`` subject to ``w >= 0, sum(w) = 1``. + The gradient is computed in two separate terms so that the per-step + cost remains ``O(nT)`` regardless of whether shrinkage is applied: + + grad = (1-alpha)/T * X^T(Xw) + alpha * target @ w + + This avoids stacking a (T+n)xn matrix, which would inflate per-step + cost by O(n) under shrinkage. Return tilt (``rho != 0``) is not supported. Args: project: Clip and renormalize after solving (see ``solve_kkt``). Returns: - ``(w, n_iters)`` — weight vector of shape ``(N,)`` and number of - ADMM iterations. + ``(w, n_iters)`` — weight vector of shape ``(N,)`` and the number + of gradient steps taken. Examples: >>> import numpy as np >>> from fast_minimum_variance import Problem >>> X = np.random.default_rng(0).standard_normal((100, 5)) - >>> w, iters = Problem(X).solve_osqp() - >>> float(round(w.sum(), 6)) + >>> w, iters = Problem(X).solve_proximal() + >>> float(round(w.sum(), 10)) 1.0 - >>> bool((w >= -1e-6).all()) + >>> bool((w >= 0).all()) True """ - n = self.n + from .proximal import prox_gradient - if self.target is None: - p_dense = 2.0 * ((self.X.T @ self.X) / self.t) + if self.target is not None and self.alpha > 0.0: + c = 1.0 - self.alpha + mat = np.sqrt(c) / np.sqrt(self.t) * self.X + alpha, target = self.alpha, self.target + + def extra_grad(v, a=alpha, tgt=target): + """Return the shrinkage gradient contribution a * target @ v.""" + return a * (tgt @ v) else: - p_dense = 2.0 * ((1 - self.alpha) * (self.X.T @ self.X) / self.t + self.alpha * self.target) + mat = self.X / np.sqrt(self.t) + extra_grad = None - # p_dense = 2.0 * ((1-self.alpha) * (self.X.T @ self.X)/self.t + self.alpha * self.target) - p_upper = triu(p_dense, format="csc") + vec = np.zeros(self.t) + w, n_iters = prox_gradient(mat, vec, extra_grad=extra_grad) + if project: + w = self._clip_and_renormalize(w) + return w, n_iters - q = np.zeros(n) - if self.rho != 0.0 and self.mu is not None: - q = -self.rho * self.mu + def solve_fista(self, *, project: bool = True): + r"""Solve via Nesterov-accelerated proximal gradient (FISTA). - a_mat, l_vec, u_vec = self._osqp_constraints() + Uses the Beck-Teboulle momentum sequence to achieve $O(1/k^2)$ + convergence for convex objectives; for strongly convex $f$ with + condition number $\\kappa$ the linear rate is $(1-1/\\sqrt{\\kappa})^k$, + matching CG's asymptotic iteration count. Same per-step cost + $O(nT)$ as ``solve_proximal``, typically 2--10$\\times$ fewer + iterations. + + Args: + project: Clip and renormalize after solving (see ``solve_proximal``). + + Returns: + ``(w, n_iters)`` — weight vector of shape ``(N,)`` and the number + of gradient steps taken. + + Examples: + >>> import numpy as np + >>> from fast_minimum_variance import Problem + >>> X = np.random.default_rng(0).standard_normal((100, 5)) + >>> w, iters = Problem(X).solve_fista() + >>> float(round(w.sum(), 10)) + 1.0 + >>> bool((w >= 0).all()) + True + """ + from .proximal import fista_gradient - prob = osqp.OSQP() - prob.setup(p_upper, q, a_mat, l_vec, u_vec, verbose=False) - res = prob.solve() + if self.target is not None and self.alpha > 0.0: + c = 1.0 - self.alpha + mat = np.sqrt(c) / np.sqrt(self.t) * self.X + alpha, target = self.alpha, self.target + + def extra_grad(v, a=alpha, tgt=target): + """Return the shrinkage gradient contribution a * target @ v.""" + return a * (tgt @ v) + else: + mat = self.X / np.sqrt(self.t) + extra_grad = None - w = np.array(res.x) + vec = np.zeros(self.t) + w, n_iters = fista_gradient(mat, vec, extra_grad=extra_grad) if project: w = self._clip_and_renormalize(w) - return w, res.info.iter + return w, n_iters diff --git a/src/fast_minimum_variance/data/__init__.py b/src/fast_minimum_variance/data/__init__.py new file mode 100644 index 0000000..12f7fb2 --- /dev/null +++ b/src/fast_minimum_variance/data/__init__.py @@ -0,0 +1,5 @@ +"""Synthetic equity return data for benchmarking and testing.""" + +from ._simulate import simulate_equity_returns + +__all__ = ["simulate_equity_returns"] diff --git a/src/fast_minimum_variance/data/_simulate.py b/src/fast_minimum_variance/data/_simulate.py new file mode 100644 index 0000000..24f5846 --- /dev/null +++ b/src/fast_minimum_variance/data/_simulate.py @@ -0,0 +1,79 @@ +"""Simulate equity returns from a latent factor model.""" + +import numpy as np + +__all__ = ["simulate_equity_returns"] + + +def simulate_equity_returns( + n: int, + T: int, # noqa: N803 + *, + k: int | None = None, + rng: np.random.Generator | int | None = None, +) -> np.ndarray: + """Simulate a TxN demeaned equity return matrix with latent factor structure. + + Returns are generated from the model + + X = F @ B.T + E + + where F (TxK) are factor returns, B (NxK) are factor loadings, and E (TxN) + is idiosyncratic noise. The first factor is a market factor with universally + positive loadings and high variance; the remaining k-1 factors are + style/industry factors with sparse loadings. This produces a covariance + spectrum qualitatively similar to equity universes: a dominant market + eigenvalue, a handful of secondary factor eigenvalues, and a long tail of + near-equal idiosyncratic eigenvalues. + + Parameters + ---------- + n: + Number of assets. + T: + Number of time periods (trading days). + k: + Number of latent factors. Defaults to ``max(3, n // 10)``. + rng: + Random state — a :class:`numpy.random.Generator`, an integer seed, + or ``None`` (non-reproducible). + + Returns: + ------- + X : ndarray of shape (T, n) + Demeaned return matrix. Each column has zero mean. + + Examples: + -------- + >>> X = simulate_equity_returns(100, 200, k=5, rng=0) + >>> X.shape + (200, 100) + >>> bool(abs(X.mean(axis=0)).max() < 1e-14) + True + """ + rng = np.random.default_rng(rng) + if k is None: + k = max(3, n // 10) + + # Factor volatilities (daily): market ~1 %, style factors ~0.5 % + factor_vols = np.concatenate([[0.01], np.full(k - 1, 0.005)]) + + # Factor returns: T x k + F = rng.standard_normal((T, k)) * factor_vols # noqa: N806 + + # Factor loadings: n x k + # Market: all assets have positive exposure in [0.4, 0.8] + # Style: sparse (~50 % non-zero), drawn from N(0, 0.2) + B = np.zeros((n, k)) # noqa: N806 + B[:, 0] = rng.uniform(0.4, 0.8, size=n) + for j in range(1, k): + mask = rng.random(n) < 0.5 + B[mask, j] = rng.standard_normal(int(mask.sum())) * 0.2 + + # Idiosyncratic volatility: uniform in [0.5 %, 1.5 %] per asset + idio_vols = rng.uniform(0.005, 0.015, size=n) + E = rng.standard_normal((T, n)) * idio_vols # noqa: N806 + + X = F @ B.T + E # noqa: N806 + X -= X.mean(axis=0) # noqa: N806 + return X diff --git a/src/fast_minimum_variance/minvar_problem.py b/src/fast_minimum_variance/minvar_problem.py index fd9f9e2..cc2ac5f 100644 --- a/src/fast_minimum_variance/minvar_problem.py +++ b/src/fast_minimum_variance/minvar_problem.py @@ -5,6 +5,7 @@ import clarabel import numpy as np from cvx.linalg import cholesky +from scipy.linalg import solve as spd_solve from scipy.optimize import nnls from scipy.sparse import csc_matrix, eye, vstack from scipy.sparse.linalg import LinearOperator, cg @@ -48,6 +49,50 @@ class _MinVarProblem(_BaseProblem): # No extra fields — X, alpha, rho, mu all inherited from _BaseProblem. + # ------------------------------------------------------------------ + # Shared helpers used by both active-set loop variants + # ------------------------------------------------------------------ + + def _compute_gradient(self, w): + """Return the full objective gradient at w, including rho*mu adjustment.""" + data_grad = (self.X.T @ (self.X @ w)) / self.t + if self.target_lr is not None: + bar_lam, U_k, delta_k = self.target_lr # noqa: N806 + tgt_w = bar_lam * w + U_k @ (delta_k * (U_k.T @ w)) + grad = 2.0 * ((1 - self.alpha) * data_grad + self.alpha * tgt_w) + elif self.target is not None: + grad = 2.0 * ((1 - self.alpha) * data_grad + self.alpha * self.target @ w) + else: + grad = 2.0 * data_grad + if self.rho != 0.0 and self.mu is not None: + grad = grad - self.rho * self.mu + return grad + + @staticmethod + def _primal_drop(w_a, asset_active, tol): + """Drop negative-weight assets from active set in-place; return True if any dropped.""" + if not np.any(w_a < -tol): + return False + idx = np.where(asset_active)[0] + strong = w_a < -10 * tol + if np.any(strong): + asset_active[idx[strong]] = False + else: + asset_active[idx[np.argmin(w_a)]] = False + return True + + def _dual_add(self, grad, asset_active, tol): + """Return index of excluded asset that violates KKT dual condition, or -1 if none.""" + excluded = ~asset_active + if not excluded.any(): + return -1 + g_a = grad[asset_active] + lambda_ = np.median(g_a) if g_a.size > 5 else g_a.mean() + nu = grad - lambda_ + idx_ex = np.where(excluded)[0] + j = idx_ex[np.argmin(nu[excluded])] + return int(j) if nu[j] < -tol else -1 + # ------------------------------------------------------------------ # Outer loop: primal elimination + dual feasibility check # ------------------------------------------------------------------ @@ -62,77 +107,38 @@ def _constraint_active_set(self, solve_fn, tol=1e-6, max_iter=10_000): """ n = self.n asset_active = np.ones(n, dtype=bool) - total_iters = 0 - + total_inner_iters = 0 + outer_steps = 0 prev_active = None + w = np.zeros(n) for _ in range(max_iter): if prev_active is not None and np.array_equal(prev_active, asset_active): break # pragma: no cover - structurally unreachable safety guard prev_active = asset_active.copy() - # === Solve === w_a, step_iters = solve_fn(asset_active) - total_iters += step_iters - - # === PRIMAL STEP === - neg = w_a < -tol - if np.any(neg): - idx = np.where(asset_active)[0] + outer_steps += 1 + total_inner_iters += step_iters - strong = w_a < -10 * tol + if self._primal_drop(w_a, asset_active, tol): + continue - if np.any(strong): - asset_active[idx[strong]] = False - else: - j = idx[np.argmin(w_a)] - asset_active[j] = False - - continue # CRITICAL - - # === Assemble full vector === w = np.zeros(n) w[asset_active] = w_a - # === Gradient === - if self.target is None: - grad = 2.0 * (self.X.T @ (self.X @ w)) / self.t - else: - grad = 2.0 * ((1 - self.alpha) * (self.X.T @ (self.X @ w)) / self.t + self.alpha * self.target @ w) - - if self.rho != 0.0 and self.mu is not None: - grad = grad - self.rho * self.mu - - # === Lambda === - g_a = grad[asset_active] - lambda_ = np.median(g_a) if g_a.size > 5 else g_a.mean() - - # === Dual === - nu = grad - lambda_ - - excluded = ~asset_active - if not excluded.any(): + j = self._dual_add(self._compute_gradient(w), asset_active, tol) + if j < 0: break - - nu_ex = nu[excluded] - idx_ex = np.where(excluded)[0] - - j = idx_ex[np.argmin(nu_ex)] - violate = nu[j] - - # === DUAL STEP === - if violate >= -tol: - break - asset_active[j] = True - return w, total_iters + return w, outer_steps, total_inner_iters # ------------------------------------------------------------------ # Inner steps # ------------------------------------------------------------------ - def _kkt_step(self, active): + def _kkt_step(self, active, x0=None): """Solve the reduced SPD system directly; return ``(w_a, 1)``. Stationarity gives ``2*Sigma_a*w_a = lambda*1 + rho*mu_a``. A single @@ -148,10 +154,10 @@ def _kkt_step(self, active): sigma = (1.0 - self.alpha) * (x_a.T @ x_a) / self.t + self.alpha * self.target[np.ix_(active, active)] if self.rho == 0.0 or self.mu is None: - v = np.linalg.solve(sigma, np.ones(n_a)) + v = spd_solve(sigma, np.ones(n_a), assume_a="pos") return v / v.sum(), 1 - v1, v2 = np.linalg.solve(sigma, np.column_stack([np.ones(n_a), self.mu[active]])).T + v1, v2 = spd_solve(sigma, np.column_stack([np.ones(n_a), self.mu[active]]), assume_a="pos").T half_rho = 0.5 * self.rho half_lambda = (1.0 - half_rho * v2.sum()) / v1.sum() return half_lambda * v1 + half_rho * v2, 1 @@ -160,45 +166,275 @@ def _cvxpy_constraints(self, w, cp): """Return budget-equality and long-only inequality constraints for CVXPY.""" return [cp.sum(w) == 1, w >= 0] - def _cg_step(self, active): + def _cg_step(self, active, x0=None): """Solve the reduced SPD system via matrix-free CG; return ``(w_a, iters)``. - Builds a ``LinearOperator`` for ``v -> (1-alpha)*X_a'*(X_a*v) + gamma*v`` + Builds a ``LinearOperator`` for ``v -> (1-alpha)*X_a'*(X_a*v) + alpha*T0_a*v`` and runs conjugate gradients without ever forming ``Sigma_a`` explicitly. + When ``target_lr = (bar_lam, U_k, delta_k)`` is supplied the target term is + applied as ``bar_lam*v + U_k_a @ (delta_k * (U_k_a.T @ v))`` at O(n_a*k) + per call instead of O(n_a^2) for a dense submatrix. + + Args: + active: Boolean mask selecting the active asset subset. + x0: Optional initial guess for the first CG solve (warm start). + When provided it must have shape ``(active.sum(),)``. """ x_a = self.X[:, active] n_a = int(active.sum()) - target_sub = self.target[np.ix_(active, active)] if self.target is not None else None + + # Build the target application — prefer low-rank factors over dense matrix. + if self.target_lr is not None: + bar_lam_lr, U_k_lr, delta_k_lr = self.target_lr # noqa: N806 + U_k_a = U_k_lr[active, :] # noqa: N806 # (n_a, k) + c_data = 1.0 - self.alpha + c_lr = self.alpha + + def _apply_target(v): + """Apply low-rank target: bar_lam * v + U (delta * (U^T v)).""" + return bar_lam_lr * v + U_k_a @ (delta_k_lr * (U_k_a.T @ v)) + else: + target_sub = self.target[np.ix_(active, active)] if self.target is not None else None + c_data = 1.0 - self.alpha if target_sub is not None else 1.0 + c_lr = self.alpha if target_sub is not None else 0.0 + + def _apply_target(v): + """Apply dense target submatrix to v.""" + return target_sub @ v # type: ignore[operator] + + count1 = [0] def matvec(v): - """Apply Sigma_LW matrix-free: v -> (1-alpha)/T * X_a'*(X_a*v) + alpha*target_a*v.""" - if target_sub is None: - return (x_a.T @ (x_a @ v)) / self.t - return (1.0 - self.alpha) * (x_a.T @ (x_a @ v)) / self.t + self.alpha * (target_sub @ v) + """Apply Sigma_a to v for the budget-constraint CG solve.""" + count1[0] += 1 + result = c_data * (x_a.T @ (x_a @ v)) / self.t + if c_lr: + result = result + c_lr * _apply_target(v) + return result op = LinearOperator((n_a, n_a), matvec=matvec, dtype=np.float64) # type: ignore[call-arg, missing-argument, unknown-argument, parameter-already-assigned] # ty:ignore[missing-argument, parameter-already-assigned, unknown-argument] - iters = [0] + if self.rho == 0.0 or self.mu is None: + v, _ = cg(op, np.ones(n_a), x0=x0) + return v / v.sum(), count1[0] + + count2 = [0] + + def matvec2(v): + """Apply Sigma_a to v for the return-tilt CG solve.""" + count2[0] += 1 + result = c_data * (x_a.T @ (x_a @ v)) / self.t + if c_lr: + result = result + c_lr * _apply_target(v) + return result + + op2 = LinearOperator((n_a, n_a), matvec=matvec2, dtype=np.float64) # type: ignore[call-arg, missing-argument, unknown-argument, parameter-already-assigned] # ty:ignore[missing-argument, parameter-already-assigned, unknown-argument] + v1, _ = cg(op, np.ones(n_a), x0=x0) + v2, _ = cg(op2, self.mu[active], x0=x0) + half_rho = 0.5 * self.rho + half_lambda = (1.0 - half_rho * v2.sum()) / v1.sum() + return half_lambda * v1 + half_rho * v2, count1[0] + count2[0] - def _count(_): - """Increment CG iteration counter for the first solve.""" - iters[0] += 1 + def _pcg_step(self, active, x0=None): + """Solve the reduced SPD system via PCG with RMT preconditioner; return (w_a, iters). - if self.rho == 0.0 or self.mu is None: - v, _ = cg(op, np.ones(n_a), callback=_count) - return v / v.sum(), iters[0] + The system matrix is the oracle-LW covariance (using self.alpha and self.target). + The preconditioner P = T0^RMT is applied via the Woodbury identity: + P^{-1} v = (1/bar_lam) v + U_k diag(1/lambda_k - 1/bar_lam) U_k^T v + costing O(n_a * k) per application. Requires self.pcg_lr to be set. + """ + x_a = self.X[:, active] + n_a = int(active.sum()) - iters2 = [0] + # System matvec — identical path to _cg_step + if self.target_lr is not None: + bar_lam_lr, U_k_lr, delta_k_lr = self.target_lr # noqa: N806 + U_k_a_sys = U_k_lr[active, :] # noqa: N806 + c_data, c_lr = 1.0 - self.alpha, self.alpha - def _count2(_): - """Increment CG iteration counter for the second solve.""" - iters2[0] += 1 + def _apply_system(v): + return bar_lam_lr * v + U_k_a_sys @ (delta_k_lr * (U_k_a_sys.T @ v)) + else: + target_sub = self.target[np.ix_(active, active)] if self.target is not None else None + c_data = 1.0 - self.alpha if target_sub is not None else 1.0 + c_lr = self.alpha if target_sub is not None else 0.0 - v1, _ = cg(op, np.ones(n_a), callback=_count) - v2, _ = cg(op, self.mu[active], callback=_count2) - half_rho = 0.5 * self.rho - half_lambda = (1.0 - half_rho * v2.sum()) / v1.sum() - return half_lambda * v1 + half_rho * v2, iters[0] + iters2[0] + def _apply_system(v): + return target_sub @ v # type: ignore[operator] + + count = [0] + + def matvec(v): + count[0] += 1 + result = c_data * (x_a.T @ (x_a @ v)) / self.t + if c_lr: + result = result + c_lr * _apply_system(v) + return result + + op = LinearOperator((n_a, n_a), matvec=matvec, dtype=np.float64) # type: ignore[call-arg] + + # Preconditioner P^{-1}: Woodbury inverse of T0^RMT restricted to active set + bar_lam_p, U_k_p, delta_k_p = self.pcg_lr # type: ignore[misc] # noqa: N806 + U_k_a_p = U_k_p[active, :] # noqa: N806 # (n_a, k) + inv_coeff = 1.0 / (bar_lam_p + delta_k_p) - 1.0 / bar_lam_p # (k,) negative + + def precond(v): + return (1.0 / bar_lam_p) * v + U_k_a_p @ (inv_coeff * (U_k_a_p.T @ v)) + + M_op = LinearOperator((n_a, n_a), matvec=precond, dtype=np.float64) # type: ignore[call-arg] # noqa: N806 + + v, _ = cg(op, np.ones(n_a), x0=x0, M=M_op) + return v / v.sum(), count[0] + + def _constraint_active_set_warm(self, solve_fn=None, tol=1e-6, max_iter=10_000, warm_start=None): + """Active-set loop with warm-starting; returns ``(w, iters, active, w_full)``. + + Generalises ``_constraint_active_set``: accepts an initial active set + and passes the previous iterate as a starting guess to the inner solver. + Solvers that support an initial guess (CG via ``_cg_step``) benefit from + both the warm active set and the x0; direct solvers (KKT via + ``_kkt_step``) accept and silently ignore the x0, profiting only from + the warm active set. + + Args: + solve_fn: Inner solver callable ``(active, x0=None) -> (w_a, iters)``. + Defaults to ``self._cg_step``. + tol: Primal feasibility tolerance; assets with weight below ``-tol`` + are dropped from the active set. + max_iter: Maximum number of outer active-set iterations. + warm_start: Optional ``(active_mask, w_full)`` from a previous call. + ``active_mask`` is a boolean array of length ``n``; + ``w_full`` is the full ``n``-vector of weights. + + Returns: + ``(w, total_iters, final_active, w_full)`` — solution, cumulative + iteration count, final active-set mask, and full weight vector + suitable as the ``warm_start`` argument for the next solve. + """ + if solve_fn is None: + solve_fn = self._cg_step + n = self.n + + if warm_start is not None: + asset_active, last_w_full = warm_start + asset_active = asset_active.copy() + else: + asset_active = np.ones(n, dtype=bool) + last_w_full = None + + total_inner_iters = 0 + outer_steps = 0 + prev_active = None + w = np.zeros(n) + + for _ in range(max_iter): + if prev_active is not None and np.array_equal(prev_active, asset_active): + break # pragma: no cover - structurally unreachable safety guard + prev_active = asset_active.copy() + + x0 = None + if last_w_full is not None: + sub = last_w_full[asset_active] + s = sub.sum() + if s > 1e-12: + x0 = sub / s + + w_a, step_iters = solve_fn(asset_active, x0=x0) + outer_steps += 1 + total_inner_iters += step_iters + + if self._primal_drop(w_a, asset_active, tol): + continue + + w = np.zeros(n) + w[asset_active] = w_a + last_w_full = w.copy() + + j = self._dual_add(self._compute_gradient(w), asset_active, tol) + if j < 0: + break + asset_active[j] = True + + return w, outer_steps, total_inner_iters, asset_active.copy(), last_w_full + + def solve_cg_warm(self, *, project=True, warm_start=None): + """Solve via matrix-free CG with warm-starting. + + Like ``solve_cg`` but accepts and returns warm-start state so that a + sequence of related problems (e.g. an efficient-frontier sweep over + many ``rho`` values) can chain solves together. Adjacent problems share + a similar active set and similar solution, so subsequent solves need + far fewer outer iterations and CG steps. + + Args: + project: Clip weights to ``[0, ∞)`` and renormalize to sum to 1. + warm_start: ``(active_mask, w_full)`` returned by a previous call, + or ``None`` for a cold start. + + Returns: + ``(w, outer_steps, inner_iters, warm_state)`` — weight vector, + number of outer active-set steps, cumulative CG iteration count, + and warm state for the next call in the sequence. + + Examples: + >>> import numpy as np + >>> from fast_minimum_variance import Problem + >>> X = np.random.default_rng(0).standard_normal((100, 5)) + >>> mu = np.ones(5) * 0.01 + >>> warm = None + >>> for rho in [0.0, 0.5, 1.0]: + ... p = Problem(X, rho=rho, mu=mu) + ... w, outer, inner, warm = p.solve_cg_warm(warm_start=warm) + >>> float(round(w.sum(), 10)) + 1.0 + >>> bool((w >= 0).all()) + True + """ + w, outer, inner, final_active, final_w = self._constraint_active_set_warm( + solve_fn=self._cg_step, warm_start=warm_start + ) + if project: + w = self._clip_and_renormalize(w) + return w, outer, inner, (final_active, final_w) + + def solve_kkt_warm(self, *, project=True, warm_start=None): + """Solve via direct KKT factorisation with active-set warm-starting. + + Like ``solve_kkt`` but accepts and returns warm-start state for chaining + a sequence of related problems. KKT is a direct solver, so only the + active-set mask is warm-started (not an iterative initial guess); the + benefit is fewer outer primal-dual iterations when consecutive problems + share a similar active set. + + Args: + project: Clip weights to ``[0, ∞)`` and renormalize to sum to 1. + warm_start: ``(active_mask, w_full)`` returned by a previous call, + or ``None`` for a cold start. + + Returns: + ``(w, outer_steps, warm_state)`` — weight vector, number of outer + active-set steps, and warm state for the next call in the sequence. + + Examples: + >>> import numpy as np + >>> from fast_minimum_variance import Problem + >>> X = np.random.default_rng(0).standard_normal((100, 5)) + >>> mu = np.ones(5) * 0.01 + >>> warm = None + >>> for rho in [0.0, 0.5, 1.0]: + ... p = Problem(X, rho=rho, mu=mu) + ... w, outer, warm = p.solve_kkt_warm(warm_start=warm) + >>> float(round(w.sum(), 10)) + 1.0 + >>> bool((w >= 0).all()) + True + """ + w, outer, _inner, final_active, final_w = self._constraint_active_set_warm( + solve_fn=self._kkt_step, warm_start=warm_start + ) + if project: + w = self._clip_and_renormalize(w) + return w, outer, (final_active, final_w) def _clarabel_constraints(self): """Return budget-equality and long-only inequality constraints for Clarabel.""" diff --git a/src/fast_minimum_variance/problem.py b/src/fast_minimum_variance/problem.py index e0fab7b..c0c42ab 100644 --- a/src/fast_minimum_variance/problem.py +++ b/src/fast_minimum_variance/problem.py @@ -92,17 +92,19 @@ def _constraint_active_set(self, solve_fn, tol=1e-6, max_iter=10_000): assert self.d is not None # noqa: S101 p = self.d.size active = np.zeros(p, dtype=bool) + outer_steps = 0 total_iters = 0 while True: w, step_iters = solve_fn(active) violations = self.C[:, ~active].T @ w - self.d[~active] + outer_steps += 1 total_iters += step_iters if np.all(violations <= 1e-10): break active[~active] |= violations > 1e-10 - return w, total_iters + return w, outer_steps, total_iters # ------------------------------------------------------------------ # Inner steps (called by the template solve_* methods on the base) diff --git a/src/fast_minimum_variance/proximal.py b/src/fast_minimum_variance/proximal.py new file mode 100644 index 0000000..2acac55 --- /dev/null +++ b/src/fast_minimum_variance/proximal.py @@ -0,0 +1,237 @@ +"""Fast solver for 0.5 ||mat @ x - vec||^2 s. t. {x >= 0, sum(x) = 1}. + +This module implements proximal gradient descent for constrained linear least squares +optimization on the probability simplex. The algorithm is based on iterative projection +using the efficient simplex projection from Duchi et al. (2008). + +The gradient is evaluated matrix-free as mat.T @ (mat @ w), avoiding explicit assembly +of the n x n normal matrix mat.T @ mat. The Lipschitz constant is estimated via power +iteration, also matrix-free. + +References: +---------- +Duchi, J., Shalev-Shwartz, S., Singer, Y., & Chandra, T. (2008). +"Efficient Projections onto the l1-Ball for Learning in High Dimensions." +Proceedings of the 25th International Conference on Machine Learning (ICML). +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np + +if TYPE_CHECKING: + from numpy.typing import NDArray + + +def proj_simplex( + vec: NDArray[np.floating], + rad: float = 1.0, +) -> NDArray[np.floating]: + """Project a vector onto the probability simplex. + + This function computes the Euclidean projection of a given vector onto the probability + simplex. The simplex is defined as the set of non-negative vectors that sum to a + given radius, typically 1. The projection ensures that the resulting vector satisfies + these constraints. + + The algorithm is based on Duchi et al. (2008) "Efficient Projections onto the + l1-Ball for Learning in High Dimensions". + + Parameters + ---------- + vec : NDArray[np.floating] + Input vector that is to be projected onto the simplex. + rad : float, optional + Radius of the simplex. The projected vector will have components summing + to this value. Default is 1.0. + + Returns: + ------- + NDArray[np.floating] + The projected vector that lies on the probability simplex. + + Raises: + ------ + ValueError + If the input vector is empty. + + Examples: + -------- + >>> import numpy as np + >>> vec = np.array([1.0, 2.0, 3.0]) + >>> result = proj_simplex(vec) + >>> bool(np.isclose(result.sum(), 1.0)) + True + >>> bool(np.all(result >= 0)) + True + + """ + muu = np.sort(vec)[::-1] + cummeans = 1 / np.arange(1, len(vec) + 1) * (np.cumsum(muu) - rad) + rho = max(np.where(muu > cummeans)[0]) + result: NDArray[np.floating] = np.maximum(vec - cummeans[rho], 0) + return result + + +def _lipschitz( + mat: NDArray[np.floating], + extra_matvec=None, + n_iter: int = 30, + rng: np.random.Generator | None = None, +) -> float: + """Estimate lambda_max(mat.T @ mat + extra) via power iteration (matrix-free). + + extra_matvec: optional callable v -> extra @ v for a second SPD contribution. + Each iteration costs O(rows * cols) — two matrix-vector products with mat — + and never forms the cols x cols normal matrix. + """ + if rng is None: + rng = np.random.default_rng() + v = rng.standard_normal(mat.shape[1]) + v /= np.linalg.norm(v) + lip = 1.0 + for _ in range(n_iter): + w = mat.T @ (mat @ v) + if extra_matvec is not None: + w = w + extra_matvec(v) + lip = float(np.linalg.norm(w)) + if lip < 1e-15: + return lip + v = w / lip + return lip + + +def fista_gradient( + mat: NDArray[np.floating], + vec: NDArray[np.floating], + *, + extra_grad=None, + eps_rel: float = 1e-6, + max_iter: int = 100000, +) -> tuple[NDArray[np.floating], int]: + r"""FISTA (Nesterov-accelerated proximal gradient) on the probability simplex. + + Same interface as ``prox_gradient`` but uses the Beck-Teboulle momentum + sequence $t_{k+1} = (1 + \\sqrt{1+4t_k^2})/2$ to achieve $O(1/k^2)$ + convergence for convex objectives (versus $O(1/k)$ for plain gradient + descent). For strongly convex $f$ with condition number $\\kappa$ the + linear convergence rate is $(1 - 1/\\sqrt{\\kappa})^k$, matching CG's + asymptotic iteration count. + + The gradient is evaluated at the extrapolated point $y_k$; the simplex + projection is applied to obtain $x_k$; the momentum update then forms + $y_{k+1} = x_k + \\frac{t_k-1}{t_{k+1}}(x_k - x_{k-1})$. + + References: + ---------- + Beck, A., & Teboulle, M. (2009). "A Fast Iterative Shrinkage-Thresholding + Algorithm for Linear Inverse Problems." SIAM Journal on Imaging Sciences. + + Examples: + -------- + >>> import numpy as np + >>> mat = np.array([[1.0, 0.5], [0.5, 1.0]]) + >>> vec = np.ones(2) + >>> result, _ = fista_gradient(mat, vec) + >>> bool(np.isclose(result.sum(), 1.0)) + True + + """ + rng = np.random.default_rng() + lip = _lipschitz(mat, extra_matvec=extra_grad, rng=rng) + step = 1.0 / lip if lip > 1e-15 else 1.0 + out_prod = mat.T @ vec + + x = proj_simplex(rng.standard_normal(mat.shape[1])) + y = x.copy() + t = 1.0 + + for ite in range(1, max_iter + 1): # noqa: B007 + grad = mat.T @ (mat @ y) - out_prod + if extra_grad is not None: + grad = grad + extra_grad(y) + x_new = proj_simplex(y - step * grad) + + t_new = 0.5 * (1.0 + np.sqrt(1.0 + 4.0 * t * t)) + y = x_new + ((t - 1.0) / t_new) * (x_new - x) + + err = float(np.linalg.norm(x - x_new)) + x = x_new + t = t_new + + if err < eps_rel: + break + + return x, ite + + +def prox_gradient( + mat: NDArray[np.floating], + vec: NDArray[np.floating], + *, + extra_grad=None, + eps_rel: float = 1e-6, + max_iter: int = 100000, +) -> tuple[NDArray[np.floating], int]: + """Perform proximal gradient descent to solve a constrained optimization problem. + + Solves the optimization problem: + minimize 0.5 ||mat @ x - vec||^2 + g(x) + subject to x >= 0, sum(x) = 1 + + where g captures an optional extra gradient term supplied via ``extra_grad``. + The gradient is evaluated matrix-free at each step; the normal matrix + mat.T @ mat is never formed. The Lipschitz constant is estimated once via + power iteration at O(n_power_iter * rows * cols) setup cost. + + Parameters + ---------- + mat : NDArray[np.floating] + A matrix of shape (n_samples, n_features). + vec : NDArray[np.floating] + A vector of shape (n_samples,). + extra_grad : callable, optional + v -> additional gradient term (e.g. ``alpha * target @ v`` for + Ledoit-Wolf shrinkage). Must be SPD for convergence guarantees. + When provided, the Lipschitz estimate accounts for this term. + eps_rel : float, optional + Relative step-size change stopping tolerance. Default is 1e-6. + max_iter : int, optional + Maximum number of iterations. Default is 100000. + + Returns: + ------- + tuple[NDArray[np.floating], int] + ``(w, n_iters)`` — weight vector of shape (n_features,) and the + number of gradient steps taken. + + Examples: + -------- + >>> import numpy as np + >>> mat = np.array([[1.0, 0.5], [0.5, 1.0]]) + >>> vec = np.ones(2) + >>> result, _ = prox_gradient(mat, vec) + >>> bool(np.isclose(result.sum(), 1.0)) + True + + """ + rng = np.random.default_rng() + prim_var: NDArray[np.floating] = np.asarray(rng.standard_normal(size=mat.shape[1])) + lip = _lipschitz(mat, extra_matvec=extra_grad, rng=rng) + step = 1.0 / lip if lip > 1e-15 else 1.0 + + # Precompute mat.T @ vec once; zero for minimum-variance (vec = 0). + out_prod = mat.T @ vec + ite = 0 + err_rel = eps_rel + 1 + while err_rel > eps_rel and ite < max_iter: + grad = mat.T @ (mat @ prim_var) - out_prod + if extra_grad is not None: + grad = grad + extra_grad(prim_var) + prim_var_new = proj_simplex(prim_var - step * grad) + err_rel = float(np.linalg.norm(prim_var - prim_var_new, 2)) + prim_var = prim_var_new.copy() + ite += 1 + return prim_var, ite diff --git a/src/fast_minimum_variance/shrinkage/__init__.py b/src/fast_minimum_variance/shrinkage/__init__.py new file mode 100644 index 0000000..73dbd52 --- /dev/null +++ b/src/fast_minimum_variance/shrinkage/__init__.py @@ -0,0 +1 @@ +"""Shrinkage target utilities for minimum-variance portfolio optimization.""" diff --git a/src/fast_minimum_variance/shrinkage/util.py b/src/fast_minimum_variance/shrinkage/util.py new file mode 100644 index 0000000..7fca396 --- /dev/null +++ b/src/fast_minimum_variance/shrinkage/util.py @@ -0,0 +1,130 @@ +"""Shrinkage intensity and target utilities (LW, OAS, RMT, constant-correlation).""" + +import numpy as np +from sklearn.covariance import ledoit_wolf, oas + + +def lw_alpha_and_target(X): # noqa: N803 + """Return (alpha_lw, target) for LW scaled-identity shrinkage via sklearn. + + X must be column-demeaned. The shrinkage target is bar_lambda * I where + bar_lambda = ||X||_F^2 / (n * T) is the average per-asset variance + (equivalently, the mean eigenvalue of the sample covariance X^T X / T). + """ + _, alpha = ledoit_wolf(X, assume_centered=True) + n = X.shape[1] + T = X.shape[0] # noqa: N806 + bar_lam = float(np.linalg.norm(X, "fro") ** 2) / (n * T) + cov = (X.transpose() @ X) / T + bar_lam_cov = np.linalg.trace(cov) / n + assert np.isclose(bar_lam, bar_lam_cov) # noqa: S101 + + delta2 = np.linalg.norm(cov - bar_lam * np.eye(n), "fro") ** 2 + norms_sq = np.sum(X**2, axis=1) + beta2 = (np.sum(norms_sq**2) - 2 * np.sum(X * (X @ cov)) + T * np.linalg.norm(cov, "fro") ** 2) / T**2 + alpha_manual = min(1.0, beta2 / delta2) + assert np.isclose(alpha_manual, alpha) # noqa: S101 + + return float(alpha), bar_lam * np.eye(n) + + +def lw_alpha_and_target_hard(X, alpha=0.5): # noqa: N803 + """Return (alpha, target) for scaled-identity shrinkage with a fixed alpha. + + X must be column-demeaned. The shrinkage target is bar_lambda * I where + bar_lambda = ||X||_F^2 / (n * T) is the average per-asset variance + (equivalently, the mean eigenvalue of the sample covariance X^T X / T). + """ + # _, alpha = ledoit_wolf(X, assume_centered=True) + n = X.shape[1] + T = X.shape[0] # noqa: N806 + bar_lam = float(np.linalg.norm(X, "fro") ** 2) / (n * T) + + return alpha, bar_lam * np.eye(n) + + +def oas_alpha_and_target(X): # noqa: N803 + """Return (alpha_oas, target) for OAS scaled-identity shrinkage via sklearn. + + Uses the same bar_lambda * I target as LW but the Oracle Approximating + Shrinkage formula (Chen et al. 2010), which has lower MSE when n/T is + non-negligible. + """ + _, alpha = oas(X, assume_centered=True) + n = X.shape[1] + bar_lam = float(np.linalg.norm(X, "fro") ** 2) / (n * X.shape[0]) + return float(alpha), bar_lam * np.eye(n) + + +def cc_target(X): # noqa: N803 + """Constant-correlation shrinkage target (Ledoit-Wolf 2004 JoPM). + + T0_ij = rho_bar * sigma_i * sigma_j (i != j) + T0_ii = sigma_i^2 + + where sigma_i = sqrt(Sigma_ii) is the per-asset sample standard deviation + and rho_bar is the mean off-diagonal sample correlation coefficient. + Always PSD for 0 < rho_bar < 1. + + Returns (target, rho_bar). + """ + T, n = X.shape # noqa: N806 + cov = (X.T @ X) / T + std = np.sqrt(np.diag(cov)) + corr = cov / np.outer(std, std) + np.fill_diagonal(corr, 0.0) + rho_bar = float(corr.sum()) / (n * (n - 1)) + target = rho_bar * np.outer(std, std) + np.fill_diagonal(target, std**2) + return target, rho_bar + + +def lw_alpha_for_target(X, target): # noqa: N803 + """LW oracle alpha for an arbitrary SPD shrinkage target T0. + + alpha* = min(1, beta2 / delta2) where + delta2 = ||S - T0||_F^2 (distance of sample cov from target) + beta2 = (1/T^2) sum_t ||x_t x_t' - S||_F^2 (noise in S) + """ + T, _n = X.shape # noqa: N806 + cov = (X.T @ X) / T + delta2 = float(np.linalg.norm(cov - target, "fro") ** 2) + norms_sq = np.sum(X**2, axis=1) + beta2 = float((np.sum(norms_sq**2) - 2 * np.sum(X * (X @ cov)) + T * np.linalg.norm(cov, "fro") ** 2) / T**2) + return min(1.0, beta2 / delta2) + + +def rmt_target_and_alpha(X): # noqa: N803 + """RMT-clipped shrinkage target and its LW oracle alpha. + + Eigenvalues of the sample covariance above the Marchenko-Pastur bulk edge + are kept as-is (signal); all others are clipped to bar_lambda (noise floor). + The resulting target has lambda_min = bar_lambda (same as the scaled-identity + target) so it provides equally effective lambda_min lifting at any alpha, while + being 9x closer to the sample covariance in Frobenius norm than bar_lambda * I. + + T0 = bar_lambda * I + U_k @ diag(lambda_k - bar_lambda) @ U_k^T + + where (U_k, lambda_k) are the k eigenpairs of S = X^T X / T whose eigenvalues + exceed the MP upper edge bar_lambda * (1 + sqrt(n/T))^2. + + Returns (target, k, alpha) where k is the number of signal factors and + alpha is the LW oracle intensity for this target (often 1.0 for equity data, + meaning the RMT-cleaned covariance is a better estimator than the sample cov). + """ + T, n = X.shape # noqa: N806 + cov = (X.T @ X) / T + bar_lam = np.trace(cov) / n + mp_upper = bar_lam * (1.0 + np.sqrt(n / T)) ** 2 + + eigs, vecs = np.linalg.eigh(cov) # ascending order + signal = eigs > mp_upper + k = int(signal.sum()) + eigs_k = eigs[signal] + vecs_k = vecs[:, signal] + + delta_k = eigs_k - bar_lam # (k,) eigenvalue excesses + target = bar_lam * np.eye(n) + vecs_k @ np.diag(delta_k) @ vecs_k.T + lr_factors = (float(bar_lam), vecs_k, delta_k) # for O(nk) matvec + alpha = lw_alpha_for_target(X, target) + return target, lr_factors, k, float(alpha) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..4de8683 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,13 @@ +"""Shared pytest fixtures.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + + +@pytest.fixture(scope="session") +def resource_dir() -> Path: + """Return the path to the test resources directory.""" + return Path(__file__).parent / "resources" diff --git a/tests/test_base.py b/tests/test_base.py index 46acbf4..5995999 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -23,7 +23,7 @@ class _Stub(_BaseProblem): def _constraint_active_set(self, solve_fn): w, step_iters = solve_fn(None) - return w, step_iters + return w, 1, step_iters def _kkt_step(self, mask): return np.array([0.5, -0.1, 0.6]), 1 @@ -86,6 +86,13 @@ def test_wrong_target_shape_raises(self): with pytest.raises(ValueError, match="target must be"): _Stub(_X3, target=np.eye(4)) + def test_wrong_target_lr_shape_raises(self): + """A target_lr with mismatched U_k / delta_k shapes raises ValueError.""" + U_k = np.ones((4, 2)) # noqa: N806 # wrong: 4 rows but n=3 + delta_k = np.ones(2) + with pytest.raises(ValueError, match="target_lr"): + _Stub(_X3, target_lr=(0.5, U_k, delta_k)) + # --------------------------------------------------------------------------- # Shared utilities @@ -142,8 +149,8 @@ def test_solve_kkt_uses_kkt_step(self): assert iters == 1 def test_solve_cg_uses_cg_step(self): - """solve_cg delegates to _cg_step (iters==5).""" - _, iters = _Stub(_X3).solve_cg() + """solve_cg delegates to _cg_step (inner iters==5).""" + _, _, iters = _Stub(_X3).solve_cg() assert iters == 5 def test_solve_nnls_uses_nnls_solve(self): @@ -220,3 +227,97 @@ def _cvxpy_constraints(self, w, cp_module): _SpyStub(_X3).solve_cvxpy() assert received["w_type"] == "Variable" assert received["cp"] is cp + + +# --------------------------------------------------------------------------- +# solve_proximal +# --------------------------------------------------------------------------- + + +class TestSolveProximal: + """Tests for _BaseProblem.solve_proximal template.""" + + def test_returns_tuple(self): + """solve_proximal returns a (w, iters) tuple.""" + result = _Stub(_X3).solve_proximal() + assert isinstance(result, tuple) + assert len(result) == 2 + + def test_iters_positive(self): + """Iteration count is at least 1.""" + _, iters = _Stub(_X3).solve_proximal() + assert iters >= 1 + + def test_weight_shape(self): + """Returned weight vector has shape (N,).""" + w, _ = _Stub(_X3).solve_proximal() + assert w.shape == (_X3.shape[1],) + + def test_project_true_sums_to_one(self): + """project=True ensures weights sum to 1.""" + w, _ = _Stub(_X3).solve_proximal(project=True) + assert w.sum() == pytest.approx(1.0) + + def test_project_true_non_negative(self): + """project=True ensures all weights are non-negative.""" + w, _ = _Stub(_X3).solve_proximal(project=True) + assert np.all(w >= 0) + + def test_project_false_still_on_simplex(self): + """project=False skips clip-and-renormalize; prox_gradient already enforces simplex.""" + w, _ = _Stub(_X3).solve_proximal(project=False) + np.testing.assert_allclose(w.sum(), 1.0, rtol=1e-6) + assert np.all(w >= -1e-10) + + def test_project_default_clips_and_renormalizes(self): + """Default (project=True) clips and renormalizes like other template solvers.""" + w, _ = _Stub(_X3).solve_proximal() + assert w.sum() == pytest.approx(1.0) + assert np.all(w >= 0) + + +# --------------------------------------------------------------------------- +# solve_fista +# --------------------------------------------------------------------------- + + +class TestSolveFista: + """Tests for _BaseProblem.solve_fista template.""" + + @pytest.fixture(scope="class") + def X(self): # noqa: N802 + """Return a (100, 5) return matrix.""" + return np.random.default_rng(0).standard_normal((100, 5)) + + def test_returns_tuple(self, X): # noqa: N803 + """solve_fista returns a (w, iters) tuple.""" + from fast_minimum_variance import Problem + + result = Problem(X).solve_fista() + assert isinstance(result, tuple) + assert len(result) == 2 + + def test_weights_sum_to_one(self, X): # noqa: N803 + """Weights sum to 1.""" + from fast_minimum_variance import Problem + + w, _ = Problem(X).solve_fista() + assert w.sum() == pytest.approx(1.0, abs=1e-5) + + def test_weights_non_negative(self, X): # noqa: N803 + """All weights are non-negative.""" + from fast_minimum_variance import Problem + + w, _ = Problem(X).solve_fista() + assert np.all(w >= -1e-8) + + def test_with_shrinkage_and_target(self, X): # noqa: N803 + """Shrinkage branch (alpha > 0, target supplied) exercises the extra_grad path.""" + from fast_minimum_variance import Problem + + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + w, iters = Problem(X, alpha=alpha, target=np.eye(N)).solve_fista() + assert w.sum() == pytest.approx(1.0, abs=1e-5) + assert np.all(w >= -1e-8) + assert iters >= 1 diff --git a/tests/test_cg.py b/tests/test_cg.py index 2332a08..01300b3 100644 --- a/tests/test_cg.py +++ b/tests/test_cg.py @@ -38,7 +38,7 @@ class TestCgVsCvxpy: def test_plain_minvar(self, X): # noqa: N803 """Plain minimum variance (alpha=0, rho=0).""" - w_cg, _ = Problem(X).solve_cg() + w_cg, *_ = Problem(X).solve_cg() w_cvx, _ = Problem(X).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) @@ -46,7 +46,7 @@ def test_with_shrinkage(self, X): # noqa: N803 """Ledoit-Wolf shrinkage (alpha > 0).""" T, N = X.shape # noqa: N806 alpha = N / (N + T) - w_cg, _ = Problem(X, alpha=alpha).solve_cg() + w_cg, *_ = Problem(X, alpha=alpha).solve_cg() w_cvx, _ = Problem(X, alpha=alpha).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) @@ -54,13 +54,13 @@ def test_with_return_tilt(self, X): # noqa: N803 """Return tilt (rho != 0, mu given).""" rng = np.random.default_rng(1) mu = rng.standard_normal(X.shape[1]) - w_cg, _ = Problem(X, rho=0.5, mu=mu).solve_cg() + w_cg, *_ = Problem(X, rho=0.5, mu=mu).solve_cg() w_cvx, _ = Problem(X, rho=0.5, mu=mu).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) def test_small_problem(self, X_small): # noqa: N803 """Small problem (T=100, N=5).""" - w_cg, _ = Problem(X_small).solve_cg() + w_cg, *_ = Problem(X_small).solve_cg() w_cvx, _ = Problem(X_small).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) @@ -69,7 +69,7 @@ def test_shrinkage_and_tilt(self, X): # noqa: N803 T, N = X.shape # noqa: N806 alpha = N / (N + T) mu = np.ones(N) / N - w_cg, _ = Problem(X, alpha=alpha, rho=0.3, mu=mu).solve_cg() + w_cg, *_ = Problem(X, alpha=alpha, rho=0.3, mu=mu).solve_cg() w_cvx, _ = Problem(X, alpha=alpha, rho=0.3, mu=mu).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) @@ -77,7 +77,7 @@ def test_shrinkage_and_tilt(self, X): # noqa: N803 def test_various_sizes(self, N): # noqa: N803 """Agreement holds for several problem sizes.""" X = make_returns(T=5 * N, N=N, seed=N) # noqa: N806 - w_cg, _ = Problem(X).solve_cg() + w_cg, *_ = Problem(X).solve_cg() w_cvx, _ = Problem(X).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) @@ -86,6 +86,6 @@ def test_with_explicit_target(self, X): # noqa: N803 T, N = X.shape # noqa: N806 alpha = N / (N + T) target = np.eye(N) - w_cg, _ = Problem(X, alpha=alpha, target=target).solve_cg() + w_cg, *_ = Problem(X, alpha=alpha, target=target).solve_cg() w_cvx, _ = Problem(X, alpha=alpha, target=target).solve_cvxpy() np.testing.assert_allclose(w_cg, w_cvx, atol=1e-4) diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 0000000..8fa69ed --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,43 @@ +"""Tests for fast_minimum_variance.data — simulate_equity_returns.""" + +import numpy as np +import pytest + +from fast_minimum_variance.data import simulate_equity_returns + + +class TestSimulateEquityReturns: + """Tests for simulate_equity_returns.""" + + def test_output_shape(self) -> None: + """Output has shape (T, n).""" + X = simulate_equity_returns(100, 50, rng=0) # noqa: N806 + assert X.shape == (50, 100) + + def test_columns_zero_mean(self) -> None: + """Each column is demeaned (mean == 0).""" + X = simulate_equity_returns(100, 200, k=5, rng=0) # noqa: N806 + assert bool(abs(X.mean(axis=0)).max() < 1e-14) + + def test_reproducible_with_seed(self) -> None: + """Same seed produces identical arrays.""" + X1 = simulate_equity_returns(50, 20, rng=42) # noqa: N806 + X2 = simulate_equity_returns(50, 20, rng=42) # noqa: N806 + np.testing.assert_array_equal(X1, X2) + + def test_different_seeds_differ(self) -> None: + """Different seeds produce different arrays.""" + X1 = simulate_equity_returns(50, 20, rng=1) # noqa: N806 + X2 = simulate_equity_returns(50, 20, rng=2) # noqa: N806 + assert not np.array_equal(X1, X2) + + @pytest.mark.parametrize("k", [1, 5, 10]) + def test_various_k(self, k: int) -> None: + """Output shape is correct for several factor counts.""" + X = simulate_equity_returns(20, 30, k=k, rng=0) # noqa: N806 + assert X.shape == (30, 20) + + def test_default_k(self) -> None: + """Default k (n // 10, min 3) produces correct shape.""" + X = simulate_equity_returns(100, 200, rng=0) # noqa: N806 + assert X.shape == (200, 100) diff --git a/tests/test_minvar_problem.py b/tests/test_minvar_problem.py index a13353e..ab2ffac 100644 --- a/tests/test_minvar_problem.py +++ b/tests/test_minvar_problem.py @@ -94,7 +94,7 @@ def solve_fn(mask): return np.array([-5e-6, 0.6, 0.5 + 5e-6]), 1 return p._kkt_step(mask) - w, _ = p._constraint_active_set(solve_fn) + w, *_ = p._constraint_active_set(solve_fn) assert w[0] == pytest.approx(0.0) assert w.shape == (3,) @@ -148,7 +148,7 @@ def solve_fn(mask): return np.array([-0.1, 0.6, 0.5]), 3 return np.array([0.5, 0.5], dtype=float), 2 - _, total = p._constraint_active_set(solve_fn) + _, _, total = p._constraint_active_set(solve_fn) assert total == 5 def test_negative_asset_removed(self): @@ -182,7 +182,7 @@ def solve_fn(mask): return np.array([-0.1, 0.6, 0.5]), 1 return np.array([0.5, 0.5], dtype=float), 1 - w, _ = p._constraint_active_set(solve_fn) + w, *_ = p._constraint_active_set(solve_fn) assert w[0] == pytest.approx(0.0) assert w.shape == (3,) @@ -193,7 +193,7 @@ def test_dual_step_readds_asset(self): X = np.eye(3) # noqa: N806 p = MinVarProblem(X) - w, _ = p._constraint_active_set(p._kkt_step) + w, *_ = p._constraint_active_set(p._kkt_step) # All assets should be in the final portfolio (equal-weight is optimal). assert (w > 0).all() @@ -301,30 +301,30 @@ class TestSolveCg: def test_shape(self, mvp): """Output weight vector has shape (N,).""" - w, _ = mvp.solve_cg() + w, *_ = mvp.solve_cg() assert w.shape == (mvp.n,) def test_weights_sum_to_one(self, mvp): """Weights sum to 1.""" - w, _ = mvp.solve_cg() + w, *_ = mvp.solve_cg() assert w.sum() == pytest.approx(1.0, abs=1e-4) def test_weights_non_negative(self, mvp): """All weights are non-negative.""" - w, _ = mvp.solve_cg() + w, *_ = mvp.solve_cg() assert np.all(w >= -1e-4) def test_close_to_kkt(self, mvp_small): """CG solution is close to the exact KKT solution.""" w_kkt, _ = mvp_small.solve_kkt() - w_cg, _ = mvp_small.solve_cg() + w_cg, *_ = mvp_small.solve_cg() np.testing.assert_allclose(w_cg, w_kkt, atol=1e-4) def test_with_shrinkage(self, X_small): # noqa: N803 """Shrinkage branch (alpha > 0) agrees with KKT.""" T, N = X_small.shape # noqa: N806 p = MinVarProblem(X_small, alpha=N / (N + T)) - w_cg, _ = p.solve_cg() + w_cg, *_ = p.solve_cg() w_kkt, _ = p.solve_kkt() np.testing.assert_allclose(w_cg, w_kkt, atol=1e-4) @@ -333,13 +333,13 @@ def test_return_tilt_branch(self, X_small): # noqa: N803 _T, N = X_small.shape # noqa: N806 mu = np.random.default_rng(7).standard_normal(N) p = MinVarProblem(X_small, rho=0.5, mu=mu) - w_cg, _ = p.solve_cg() + w_cg, *_ = p.solve_cg() w_kkt, _ = p.solve_kkt() np.testing.assert_allclose(w_cg, w_kkt, atol=1e-4) def test_project_false(self, mvp): """project=False returns raw CG solution without clipping.""" - w, _ = mvp.solve_cg(project=False) + w, *_ = mvp.solve_cg(project=False) assert w.shape == (mvp.n,) @@ -516,3 +516,180 @@ def test_with_shrinkage(self, X_small): # noqa: N803 w_mvp, _ = MinVarProblem(X_small, alpha=alpha).solve_kkt() w_prob, _ = Problem(X_small, alpha=alpha).solve_kkt() np.testing.assert_allclose(w_mvp, w_prob, atol=1e-6) + + +# --------------------------------------------------------------------------- +# target_lr (low-rank shrinkage target) +# --------------------------------------------------------------------------- + + +def _make_target_lr(n, k=2, seed=0): + """Build a valid (bar_lam, U_k, delta_k) low-rank target triple.""" + rng = np.random.default_rng(seed) + U_k = rng.standard_normal((n, k)) # noqa: N806 + delta_k = np.abs(rng.standard_normal(k)) + 0.1 + bar_lam = 0.5 + return bar_lam, U_k, delta_k + + +class TestTargetLr: + """target_lr (low-rank target) exercises distinct code branches in CG and KKT steps.""" + + @pytest.fixture(scope="class") + def X(self): # noqa: N802 + """Return a (100, 8) return matrix.""" + return np.random.default_rng(3).standard_normal((100, 8)) + + def test_cg_with_target_lr(self, X): # noqa: N803 + """solve_cg with target_lr returns a valid portfolio.""" + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + target_lr = _make_target_lr(N) + w, *_ = MinVarProblem(X, alpha=alpha, target_lr=target_lr).solve_cg() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + + def test_kkt_with_target_lr(self, X): # noqa: N803 + """solve_kkt with target_lr returns a valid portfolio.""" + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + target_lr = _make_target_lr(N) + w, _ = MinVarProblem(X, alpha=alpha, target_lr=target_lr).solve_kkt() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + + def test_cg_with_target_lr_and_return_tilt(self, X): # noqa: N803 + """target_lr + rho != 0 exercises the matvec2 c_lr branch.""" + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + target_lr = _make_target_lr(N) + mu = np.random.default_rng(5).standard_normal(N) + w, *_ = MinVarProblem(X, alpha=alpha, target_lr=target_lr, rho=0.5, mu=mu).solve_cg() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + + +# --------------------------------------------------------------------------- +# Warm-start solvers +# --------------------------------------------------------------------------- + + +class TestWarmStart: + """solve_cg_warm and solve_kkt_warm chain solves with warm-starting.""" + + @pytest.fixture(scope="class") + def X(self): # noqa: N802 + """Return a (100, 8) return matrix.""" + return np.random.default_rng(7).standard_normal((100, 8)) + + def test_solve_cg_warm_cold_start(self, X): # noqa: N803 + """Cold start returns a valid portfolio.""" + from fast_minimum_variance import Problem + + w, outer, inner, warm = Problem(X).solve_cg_warm() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + assert outer >= 1 + assert inner >= 1 + assert warm is not None + + def test_solve_kkt_warm_cold_start(self, X): # noqa: N803 + """Cold KKT start returns a valid portfolio.""" + from fast_minimum_variance import Problem + + w, outer, warm = Problem(X).solve_kkt_warm() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + assert outer >= 1 + assert warm is not None + + def test_solve_cg_warm_chained(self, X): # noqa: N803 + """Chained warm-start produces same result as cold start.""" + from fast_minimum_variance import Problem + + w_cold, *_ = Problem(X).solve_cg() + w_warm, _, _, warm = Problem(X).solve_cg_warm() + w_warm2, *_ = Problem(X, rho=0.0).solve_cg_warm(warm_start=warm) + np.testing.assert_allclose(w_cold, w_warm, atol=1e-4) + assert abs(w_warm2.sum() - 1.0) < 1e-4 + + def test_solve_kkt_warm_chained(self, X): # noqa: N803 + """Chained KKT warm-start agrees with cold KKT.""" + from fast_minimum_variance import Problem + + w_cold, _ = Problem(X).solve_kkt() + w_warm, _, warm = Problem(X).solve_kkt_warm() + np.testing.assert_allclose(w_cold, w_warm, atol=1e-6) + w_warm2, _, _ = Problem(X).solve_kkt_warm(warm_start=warm) + assert abs(w_warm2.sum() - 1.0) < 1e-6 + + +# --------------------------------------------------------------------------- +# solve_pcg (PCG with RMT preconditioner) +# --------------------------------------------------------------------------- + + +def _make_pcg_lr(X, k=2, seed=1): # noqa: N803 + """Build a valid (bar_lam, U_k, delta_k) RMT preconditioner triple.""" + rng = np.random.default_rng(seed) + T, N = X.shape # noqa: N806 + bar_lam = float(np.trace(X.T @ X / T) / N) + U_k, _ = np.linalg.qr(rng.standard_normal((N, k))) # noqa: N806 + U_k = U_k[:, :k] # noqa: N806 + delta_k = np.abs(rng.standard_normal(k)) + 0.1 + return bar_lam, U_k, delta_k + + +class TestSolvePcg: + """Tests for MinVarProblem.solve_pcg (PCG with RMT preconditioner).""" + + @pytest.fixture(scope="class") + def X(self): # noqa: N802 + """Return a (100, 5) return matrix.""" + return np.random.default_rng(5).standard_normal((100, 5)) + + @pytest.fixture(scope="class") + def pcg_lr(self, X): # noqa: N803 + """Valid (bar_lam, U_k, delta_k) preconditioner triple for X.""" + return _make_pcg_lr(X) + + def test_raises_without_pcg_lr(self, X): # noqa: N803 + """solve_pcg raises ValueError when pcg_lr is not set.""" + with pytest.raises(ValueError, match="pcg_lr"): + MinVarProblem(X).solve_pcg() + + def test_returns_valid_portfolio(self, X, pcg_lr): # noqa: N803 + """solve_pcg returns weights that sum to 1 and are non-negative.""" + w, outer, inner = MinVarProblem(X, pcg_lr=pcg_lr).solve_pcg() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + assert outer >= 1 + assert inner >= 1 + + def test_project_false(self, X, pcg_lr): # noqa: N803 + """project=False skips clip-and-renormalize.""" + w, *_ = MinVarProblem(X, pcg_lr=pcg_lr).solve_pcg(project=False) + assert w.shape == (X.shape[1],) + + def test_close_to_kkt(self, X, pcg_lr): # noqa: N803 + """PCG solution agrees with the direct KKT solution.""" + w_pcg, *_ = MinVarProblem(X, pcg_lr=pcg_lr).solve_pcg() + w_kkt, _ = MinVarProblem(X).solve_kkt() + np.testing.assert_allclose(w_pcg, w_kkt, atol=1e-4) + + def test_with_target_lr(self, X, pcg_lr): # noqa: N803 + """target_lr exercises the low-rank system-matvec branch in _pcg_step.""" + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + target_lr = _make_target_lr(N) + w, *_ = MinVarProblem(X, alpha=alpha, target_lr=target_lr, pcg_lr=pcg_lr).solve_pcg() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) + + def test_with_dense_target(self, X, pcg_lr): # noqa: N803 + """Dense target exercises the target_sub branch and _apply_system call in _pcg_step.""" + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + w, *_ = MinVarProblem(X, alpha=alpha, target=np.eye(N), pcg_lr=pcg_lr).solve_pcg() + assert abs(w.sum() - 1.0) < 1e-4 + assert np.all(w >= -1e-4) diff --git a/tests/test_problem.py b/tests/test_problem.py index 4fb37cb..146d2ba 100644 --- a/tests/test_problem.py +++ b/tests/test_problem.py @@ -267,7 +267,7 @@ def solve_fn(active): return np.array([-0.1, 1.1]), 3 return np.array([0.0, 1.0]), 2 - _, total = p._constraint_active_set(solve_fn) + _, _, total = p._constraint_active_set(solve_fn) assert total == 5 def test_active_mask_starts_empty(self): @@ -318,10 +318,10 @@ def solve_fn(active): assert first_active[0].shape == (3,) def test_returns_w_and_total_iters(self): - """Returns (w, total_iters) tuple.""" + """Returns (w, outer_steps, total_iters) tuple.""" p = Problem(np.eye(2), C=-np.eye(2), d=np.zeros(2)) w_expected = np.array([0.6, 0.4]) - result_w, result_iters = p._constraint_active_set(lambda _: (w_expected, 7)) + result_w, _, result_iters = p._constraint_active_set(lambda _: (w_expected, 7)) np.testing.assert_array_equal(result_w, w_expected) assert result_iters == 7 @@ -362,7 +362,7 @@ def test_accumulates_iters_across_steps(self): w1 = np.array([0.8, -0.2]) w2 = np.array([0.6, 0.4]) responses = iter([(w1, 10), (w2, 5)]) - _, total_iters = p._constraint_active_set(lambda _: next(responses)) + _, _, total_iters = p._constraint_active_set(lambda _: next(responses)) assert total_iters == 15 def test_multiple_violations_promoted_simultaneously(self): @@ -399,7 +399,7 @@ def solve_fn(active): call_count[0] += 1 return next(responses) - result_w, _ = p._constraint_active_set(solve_fn) + result_w, *_ = p._constraint_active_set(solve_fn) np.testing.assert_array_equal(result_w, w2) assert call_count[0] == 2 @@ -593,30 +593,30 @@ class TestSolveCg: def test_shape(self, problem): """Output weight vector has shape (N,).""" - w, _ = problem.solve_cg() + w, *_ = problem.solve_cg() assert w.shape == (problem.n,) def test_weights_sum_to_one(self, problem): """Weights sum to 1.""" - w, _ = problem.solve_cg() + w, *_ = problem.solve_cg() assert abs(w.sum() - 1.0) < 1e-4 def test_weights_non_negative(self, problem): """All weights are non-negative.""" - w, _ = problem.solve_cg() + w, *_ = problem.solve_cg() assert np.all(w >= -1e-4) def test_close_to_kkt(self, problem_small): """CG solution is close to the exact KKT solution.""" w_kkt, _ = problem_small.solve_kkt() - w_cg, _ = problem_small.solve_cg() + w_cg, *_ = problem_small.solve_cg() np.testing.assert_allclose(w_cg, w_kkt, atol=1e-4) def test_with_explicit_target(self, X): # noqa: N803 """CG with explicit target exercises the target-aware _kkt_operator branch.""" T, N = X.shape # noqa: N806 alpha = N / (N + T) - w_cg, _ = Problem(X, alpha=alpha, target=np.eye(N)).solve_cg() + w_cg, *_ = Problem(X, alpha=alpha, target=np.eye(N)).solve_cg() w_kkt, _ = Problem(X, alpha=alpha, target=np.eye(N)).solve_kkt() np.testing.assert_allclose(w_cg, w_kkt, atol=1e-4) diff --git a/tests/test_proximal.py b/tests/test_proximal.py new file mode 100644 index 0000000..828ab61 --- /dev/null +++ b/tests/test_proximal.py @@ -0,0 +1,395 @@ +"""Tests for the proximal module.""" + +from __future__ import annotations + +import numpy as np +import pytest + +from fast_minimum_variance import Problem +from fast_minimum_variance.proximal import _lipschitz, fista_gradient, proj_simplex, prox_gradient + + +def make_returns(T, N, seed=0): # noqa: N803 + """Generate a (T, N) matrix of standard-normal returns.""" + return np.random.default_rng(seed).standard_normal((T, N)) + + +class TestProjSimplex: + """Test suite for proj_simplex function.""" + + @pytest.mark.parametrize("n", [2, 5, 10, 100, 1000]) + def test_output_sums_to_one(self, n: int) -> None: + """Verify projected vector sums to 1 (default radius).""" + rng = np.random.default_rng(42) + vec = rng.standard_normal(n) + result = proj_simplex(vec) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-10) + + @pytest.mark.parametrize("rad", [0.5, 1.0, 2.0, 10.0]) + def test_output_sums_to_radius(self, rad: float) -> None: + """Verify projected vector sums to specified radius.""" + rng = np.random.default_rng(42) + vec = rng.standard_normal(10) + result = proj_simplex(vec, rad=rad) + np.testing.assert_allclose(result.sum(), rad, rtol=1e-10) + + def test_output_non_negative(self) -> None: + """Verify all elements of projected vector are non-negative.""" + rng = np.random.default_rng(42) + vec = rng.standard_normal(100) + result = proj_simplex(vec) + assert np.all(result >= 0) + + def test_idempotent(self) -> None: + """Projecting a valid simplex point should return the same point.""" + vec = np.array([0.2, 0.3, 0.5]) + result = proj_simplex(vec) + np.testing.assert_allclose(result, vec, rtol=1e-10) + + def test_already_on_simplex(self) -> None: + """Vector already on simplex should be unchanged.""" + vec = np.array([0.25, 0.25, 0.25, 0.25]) + result = proj_simplex(vec) + np.testing.assert_allclose(result, vec, rtol=1e-10) + + def test_single_element(self) -> None: + """Single element vector projects to [rad].""" + result = proj_simplex(np.array([5.0])) + np.testing.assert_allclose(result, np.array([1.0]), rtol=1e-10) + + def test_all_negative_input(self) -> None: + """All negative inputs should project to corner of simplex.""" + vec = np.array([-1.0, -2.0, -3.0, -4.0, -5.0]) + result = proj_simplex(vec) + # Should project to [1, 0, 0, 0, 0] since -1 is largest + assert result[0] == pytest.approx(1.0, rel=1e-10) + assert np.sum(result[1:]) == pytest.approx(0.0, abs=1e-10) + + def test_uniform_input(self) -> None: + """Uniform input should project to uniform simplex point.""" + vec = np.array([3.0, 3.0, 3.0, 3.0]) + result = proj_simplex(vec) + expected = np.array([0.25, 0.25, 0.25, 0.25]) + np.testing.assert_allclose(result, expected, rtol=1e-10) + + @pytest.mark.parametrize("n", [10, 100, 1000]) + def test_projection_is_closest_point(self, n: int) -> None: + """Verify projection is the closest point on simplex.""" + rng = np.random.default_rng(42) + vec = rng.standard_normal(n) + proj = proj_simplex(vec) + + # Any point on simplex should be farther from vec than proj + # Test with some random simplex points + for _ in range(10): + random_simplex = rng.dirichlet(np.ones(n)) + dist_to_proj = np.linalg.norm(vec - proj) + dist_to_random = np.linalg.norm(vec - random_simplex) + assert dist_to_proj <= dist_to_random + 1e-10 + + def test_two_elements(self) -> None: + """Test projection with only 2 elements.""" + vec = np.array([2.0, -1.0]) + result = proj_simplex(vec) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-10) + assert np.all(result >= 0) + + def test_large_values(self) -> None: + """Test projection with very large values.""" + vec = np.array([1e10, 1e10, 1e10]) + result = proj_simplex(vec) + # Numerical precision degrades with very large values + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + np.testing.assert_allclose(result, np.array([1 / 3, 1 / 3, 1 / 3]), rtol=1e-5) + + def test_small_values(self) -> None: + """Test projection with very small values.""" + vec = np.array([1e-10, 2e-10, 3e-10]) + result = proj_simplex(vec) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-10) + + def test_mixed_scales(self) -> None: + """Test projection with mixed scale values.""" + vec = np.array([1e8, 1e-8, 1e4, 1e-4]) + result = proj_simplex(vec) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-10) + assert np.all(result >= 0) + + +class TestProxGradient: + """Test suite for prox_gradient function.""" + + def test_output_on_simplex(self) -> None: + """Verify output satisfies simplex constraints.""" + rng = np.random.default_rng(42) + mat = rng.standard_normal((5, 3)) + vec = rng.standard_normal(5) + result, _ = prox_gradient(mat, vec) + + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-6) + assert np.all(result >= -1e-10) + + @pytest.mark.parametrize("shape", [(3, 2), (5, 5), (10, 3), (20, 10)]) + def test_various_shapes(self, shape: tuple[int, int]) -> None: + """Test with various matrix shapes.""" + rng = np.random.default_rng(42) + m, n = shape + mat = rng.standard_normal((m, n)) + vec = rng.standard_normal(m) + result, _ = prox_gradient(mat, vec) + + assert result.shape == (n,) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + assert np.all(result >= -1e-10) + + def test_identity_matrix(self) -> None: + """Test with identity matrix - solution should be projection of vec.""" + n = 5 + mat = np.eye(n) + vec = np.array([0.1, 0.2, 0.3, 0.2, 0.2]) # Already on simplex + result, _ = prox_gradient(mat, vec) + np.testing.assert_allclose(result, vec, rtol=1e-5) + + def test_convergence_tolerance(self) -> None: + """Test that tighter tolerance gives valid results.""" + rng = np.random.default_rng(42) + mat = rng.standard_normal((10, 5)) + vec = rng.standard_normal(10) + + result_loose, _ = prox_gradient(mat, vec, eps_rel=1e-3) + result_tight, _ = prox_gradient(mat, vec, eps_rel=1e-10) + + # Both should satisfy constraints + np.testing.assert_allclose(result_loose.sum(), 1.0, rtol=1e-3) + np.testing.assert_allclose(result_tight.sum(), 1.0, rtol=1e-10) + + def test_max_iter_respected(self) -> None: + """Test that max_iter parameter is respected.""" + rng = np.random.default_rng(42) + mat = rng.standard_normal((100, 50)) + vec = rng.standard_normal(100) + + result, iters = prox_gradient(mat, vec, max_iter=10) + assert result.shape == (50,) + assert iters <= 10 + + def test_objective_convergence(self) -> None: + """Test that algorithm converges to similar objective values.""" + rng = np.random.default_rng(42) + mat = rng.standard_normal((5, 3)) + vec = rng.standard_normal(5) + + # Run multiple times - results may vary due to random init + # but objective values should be similar + objectives = [] + for _ in range(5): + result, _ = prox_gradient(mat, vec, eps_rel=1e-10) + obj = 0.5 * np.linalg.norm(mat @ result - vec) ** 2 + objectives.append(obj) + + # All objectives should be close to the minimum + np.testing.assert_allclose(objectives, objectives[0], rtol=1e-3) + + def test_near_zero_matrix(self) -> None: + """Test with near-zero matrix (low Lipschitz constant).""" + mat = np.array([[1e-10, 0], [0, 1e-10]]) + vec = np.array([1.0, 1.0]) + result, _ = prox_gradient(mat, vec) + assert result.shape == (2,) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + + def test_rank_deficient(self) -> None: + """Test with rank-deficient matrix.""" + mat = np.array([[1, 2, 3], [2, 4, 6]]) # Rank 1 + vec = np.array([1.0, 2.0]) + result, _ = prox_gradient(mat, vec) + assert result.shape == (3,) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + + +class TestNumericalStability: + """Test numerical stability with challenging inputs.""" + + def test_ill_conditioned(self) -> None: + """Test with ill-conditioned matrix.""" + # Create an ill-conditioned matrix + u = np.array([[1, 0], [0, 1], [0, 0]]) + s = np.array([1e6, 1e-6]) + vt = np.array([[1, 0], [0, 1]]) + mat = u @ np.diag(s) @ vt + vec = np.array([1.0, 1.0, 1.0]) + + result, _ = prox_gradient(mat, vec, eps_rel=1e-6, max_iter=5000) + assert result.shape == (2,) + # May not converge perfectly but should satisfy constraints approximately + assert abs(result.sum() - 1.0) < 0.01 + + @pytest.mark.parametrize("seed", range(5)) + def test_random_problems(self, seed: int) -> None: + """Test with various random problem instances.""" + rng = np.random.default_rng(seed) + m, n = rng.integers(5, 20), rng.integers(2, 10) + mat = rng.standard_normal((m, n)) + vec = rng.standard_normal(m) + + result, _ = prox_gradient(mat, vec) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + assert np.all(result >= -1e-10) + + +class TestKKTConditions: + """Test mathematical properties via KKT conditions.""" + + @pytest.mark.parametrize("seed", range(5)) + def test_proj_simplex_kkt(self, seed: int) -> None: + """Verify KKT optimality conditions for simplex projection. + + For the projection problem: + min 0.5 ||x - v||^2 + s.t. sum(x) = 1, x >= 0 + + KKT conditions require: + - Primal feasibility: sum(x) = 1, x >= 0 + - For active constraints (x_i > 0): x_i = v_i - lambda + """ + rng = np.random.default_rng(seed) + v = rng.standard_normal(10) + x = proj_simplex(v) + + # Primal feasibility + np.testing.assert_allclose(x.sum(), 1.0, rtol=1e-10) + assert np.all(x >= -1e-12) + + # For x_i > 0, we have x_i = v_i - lambda + # So lambda should be the same for all active components + active_mask = x > 1e-10 + if np.any(active_mask): + lambdas = v[active_mask] - x[active_mask] + # All should be approximately equal + np.testing.assert_allclose(lambdas, lambdas[0], rtol=1e-8) + + +class TestSolveProximal: + """Cross-validation: solve_proximal vs CVXPY reference.""" + + @pytest.fixture(scope="class") + def X(self): # noqa: N802 + """Return a (200, 10) return matrix.""" + return make_returns(T=200, N=10, seed=42) + + @pytest.fixture(scope="class") + def X_small(self): # noqa: N802 + """Return a (100, 5) return matrix.""" + return make_returns(T=100, N=5, seed=7) + + def test_return_shape(self, X) -> None: # noqa: N803 + """solve_proximal returns (w, iters) with w of shape (N,).""" + w, iters = Problem(X).solve_proximal() + assert w.shape == (X.shape[1],) + assert iters >= 1 + + def test_weights_sum_to_one(self, X) -> None: # noqa: N803 + """Weights must sum to 1.""" + w, _ = Problem(X).solve_proximal() + np.testing.assert_allclose(w.sum(), 1.0, rtol=1e-6) + + def test_weights_non_negative(self, X) -> None: # noqa: N803 + """Weights must be non-negative after projection.""" + w, _ = Problem(X).solve_proximal() + assert np.all(w >= 0) + + def test_plain_minvar_vs_cvxpy(self, X) -> None: # noqa: N803 + """Plain minimum variance agrees with CVXPY reference.""" + w_prox, _ = Problem(X).solve_proximal() + w_cvx, _ = Problem(X).solve_cvxpy() + np.testing.assert_allclose(w_prox, w_cvx, atol=1e-3) + + def test_with_shrinkage_vs_cvxpy(self, X) -> None: # noqa: N803 + """Shrinkage (alpha > 0, target = I) agrees with CVXPY reference.""" + T, N = X.shape # noqa: N806 + alpha = N / (N + T) + w_prox, _ = Problem(X, alpha=alpha, target=np.eye(N)).solve_proximal() + w_cvx, _ = Problem(X, alpha=alpha, target=np.eye(N)).solve_cvxpy() + np.testing.assert_allclose(w_prox, w_cvx, atol=1e-3) + + def test_small_problem_vs_cvxpy(self, X_small) -> None: # noqa: N803 + """Small problem (T=100, N=5) agrees with CVXPY reference.""" + w_prox, _ = Problem(X_small).solve_proximal() + w_cvx, _ = Problem(X_small).solve_cvxpy() + np.testing.assert_allclose(w_prox, w_cvx, atol=1e-3) + + @pytest.mark.parametrize("N", [2, 5, 20]) + def test_various_sizes(self, N) -> None: # noqa: N803 + """Agreement with CVXPY holds for several problem sizes.""" + X = make_returns(T=5 * N, N=N, seed=N) # noqa: N806 + w_prox, _ = Problem(X).solve_proximal() + w_cvx, _ = Problem(X).solve_cvxpy() + np.testing.assert_allclose(w_prox, w_cvx, atol=1e-3) + + def test_project_false(self, X) -> None: # noqa: N803 + """project=False skips clip-and-renormalize; result still on simplex from prox_gradient.""" + _w_proj, _ = Problem(X).solve_proximal(project=True) + w_raw, _ = Problem(X).solve_proximal(project=False) + # Both should satisfy constraints (prox_gradient enforces simplex internally) + np.testing.assert_allclose(w_raw.sum(), 1.0, rtol=1e-6) + assert np.all(w_raw >= -1e-10) + + +class TestLipschitz: + """Tests for the _lipschitz power-iteration helper.""" + + def test_returns_positive(self) -> None: + """Lipschitz estimate is strictly positive for a non-zero matrix.""" + mat = np.eye(3) + assert _lipschitz(mat) > 0 + + def test_rng_none_branch(self) -> None: + """Passing rng=None triggers internal default_rng creation.""" + mat = np.random.default_rng(0).standard_normal((5, 3)) + lip = _lipschitz(mat, rng=None) + assert lip > 0 + + +class TestFistaGradient: + """Tests for fista_gradient (Nesterov-accelerated proximal gradient).""" + + def test_output_on_simplex(self) -> None: + """Result lies on the probability simplex.""" + rng = np.random.default_rng(42) + mat = rng.standard_normal((5, 3)) + vec = rng.standard_normal(5) + result, _ = fista_gradient(mat, vec) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + assert np.all(result >= -1e-10) + + def test_iters_positive(self) -> None: + """Iteration count is at least 1.""" + mat = np.eye(3) + _, iters = fista_gradient(mat, np.zeros(3)) + assert iters >= 1 + + def test_extra_grad_branch(self) -> None: + """extra_grad callback is exercised and result still lies on simplex.""" + rng = np.random.default_rng(7) + mat = rng.standard_normal((5, 3)) + vec = rng.standard_normal(5) + alpha = 0.3 + target = np.eye(3) + + def extra_grad(v): + """Return alpha * target @ v.""" + return alpha * (target @ v) + + result, _ = fista_gradient(mat, vec, extra_grad=extra_grad) + np.testing.assert_allclose(result.sum(), 1.0, rtol=1e-5) + assert np.all(result >= -1e-10) + + @pytest.mark.parametrize("N", [2, 5, 10]) + def test_various_sizes(self, N: int) -> None: # noqa: N803 + """Agreement with prox_gradient for several problem sizes.""" + rng = np.random.default_rng(N) + mat = rng.standard_normal((3 * N, N)) + vec = rng.standard_normal(3 * N) + w_fista, _ = fista_gradient(mat, vec, eps_rel=1e-8) + w_prox, _ = prox_gradient(mat, vec, eps_rel=1e-8) + np.testing.assert_allclose(w_fista, w_prox, atol=1e-4) diff --git a/tests/test_shrinkage_util.py b/tests/test_shrinkage_util.py new file mode 100644 index 0000000..a59e85a --- /dev/null +++ b/tests/test_shrinkage_util.py @@ -0,0 +1,246 @@ +"""Tests for fast_minimum_variance.shrinkage.util — shrinkage target utilities.""" + +import numpy as np +import pytest + +from fast_minimum_variance.shrinkage.util import ( + cc_target, + lw_alpha_and_target, + lw_alpha_and_target_hard, + lw_alpha_for_target, + oas_alpha_and_target, + rmt_target_and_alpha, +) + + +@pytest.fixture(scope="module") +def X(): # noqa: N802 + """Return a demeaned (200, 10) return matrix.""" + rng = np.random.default_rng(0) + X = rng.standard_normal((200, 10)) # noqa: N806 + return X - X.mean(axis=0) + + +# --------------------------------------------------------------------------- +# lw_alpha_and_target +# --------------------------------------------------------------------------- + + +class TestLwAlphaAndTarget: + """Tests for lw_alpha_and_target.""" + + def test_alpha_in_unit_interval(self, X): # noqa: N803 + """LW alpha is in [0, 1].""" + alpha, _ = lw_alpha_and_target(X) + assert 0.0 <= alpha <= 1.0 + + def test_alpha_is_float(self, X): # noqa: N803 + """Returned alpha is a Python float.""" + alpha, _ = lw_alpha_and_target(X) + assert isinstance(alpha, float) + + def test_target_is_scaled_identity(self, X): # noqa: N803 + """Target is bar_lam * I: diagonal elements equal, off-diagonal zero.""" + _, target = lw_alpha_and_target(X) + n = X.shape[1] + assert target.shape == (n, n) + diag = np.diag(target) + np.testing.assert_allclose(diag, diag[0]) + np.testing.assert_allclose(target - np.diag(diag), 0.0, atol=1e-12) + + def test_target_bar_lam_equals_mean_eigenvalue(self, X): # noqa: N803 + """bar_lam equals the mean eigenvalue of the sample covariance.""" + T, n = X.shape # noqa: N806 + _, target = lw_alpha_and_target(X) + bar_lam = target[0, 0] + cov = (X.T @ X) / T + assert bar_lam == pytest.approx(np.trace(cov) / n, rel=1e-6) + + +# --------------------------------------------------------------------------- +# lw_alpha_and_target_hard +# --------------------------------------------------------------------------- + + +class TestLwAlphaAndTargetHard: + """Tests for lw_alpha_and_target_hard.""" + + def test_default_alpha_is_half(self, X): # noqa: N803 + """Default alpha argument is 0.5.""" + alpha, _ = lw_alpha_and_target_hard(X) + assert alpha == 0.5 + + def test_custom_alpha_passthrough(self, X): # noqa: N803 + """Supplied alpha is returned unchanged.""" + alpha, _ = lw_alpha_and_target_hard(X, alpha=0.3) + assert alpha == 0.3 + + def test_target_is_scaled_identity(self, X): # noqa: N803 + """Target is bar_lam * I regardless of alpha.""" + _, target = lw_alpha_and_target_hard(X) + n = X.shape[1] + assert target.shape == (n, n) + diag = np.diag(target) + np.testing.assert_allclose(diag, diag[0]) + np.testing.assert_allclose(target - np.diag(diag), 0.0, atol=1e-12) + + def test_same_target_as_lw(self, X): # noqa: N803 + """Target matches lw_alpha_and_target (same bar_lam * I formula).""" + _, lw_tgt = lw_alpha_and_target(X) + _, hard_tgt = lw_alpha_and_target_hard(X) + np.testing.assert_allclose(hard_tgt, lw_tgt, rtol=1e-10) + + +# --------------------------------------------------------------------------- +# oas_alpha_and_target +# --------------------------------------------------------------------------- + + +class TestOasAlphaAndTarget: + """Tests for oas_alpha_and_target.""" + + def test_alpha_in_unit_interval(self, X): # noqa: N803 + """OAS alpha is in [0, 1].""" + alpha, _ = oas_alpha_and_target(X) + assert 0.0 <= alpha <= 1.0 + + def test_alpha_is_float(self, X): # noqa: N803 + """Returned alpha is a Python float.""" + alpha, _ = oas_alpha_and_target(X) + assert isinstance(alpha, float) + + def test_target_is_scaled_identity(self, X): # noqa: N803 + """Target is bar_lam * I (same formula as LW).""" + _, target = oas_alpha_and_target(X) + n = X.shape[1] + assert target.shape == (n, n) + diag = np.diag(target) + np.testing.assert_allclose(target - np.diag(diag), 0.0, atol=1e-12) + + def test_same_target_as_lw(self, X): # noqa: N803 + """OAS and LW use the same bar_lam * I target.""" + _, lw_tgt = lw_alpha_and_target(X) + _, oas_tgt = oas_alpha_and_target(X) + np.testing.assert_allclose(oas_tgt, lw_tgt, rtol=1e-10) + + +# --------------------------------------------------------------------------- +# cc_target +# --------------------------------------------------------------------------- + + +class TestCcTarget: + """Tests for cc_target (constant-correlation shrinkage target).""" + + def test_shape(self, X): # noqa: N803 + """Target has shape (n, n).""" + target, _ = cc_target(X) + assert target.shape == (X.shape[1], X.shape[1]) + + def test_diagonal_equals_sample_variance(self, X): # noqa: N803 + """Diagonal entries equal the per-asset sample variances.""" + T, _ = X.shape # noqa: N806 + cov = (X.T @ X) / T + target, _ = cc_target(X) + np.testing.assert_allclose(np.diag(target), np.diag(cov), rtol=1e-10) + + def test_rho_bar_in_unit_interval(self, X): # noqa: N803 + """Mean correlation rho_bar is in (-1, 1).""" + _, rho_bar = cc_target(X) + assert -1.0 < rho_bar < 1.0 + + def test_target_is_symmetric(self, X): # noqa: N803 + """Target matrix is symmetric.""" + target, _ = cc_target(X) + np.testing.assert_allclose(target, target.T, atol=1e-12) + + def test_target_is_psd(self, X): # noqa: N803 + """Target matrix is positive semi-definite.""" + target, _ = cc_target(X) + eigs = np.linalg.eigvalsh(target) + assert np.all(eigs >= -1e-10) + + +# --------------------------------------------------------------------------- +# lw_alpha_for_target +# --------------------------------------------------------------------------- + + +class TestLwAlphaForTarget: + """Tests for lw_alpha_for_target.""" + + def test_alpha_in_unit_interval(self, X): # noqa: N803 + """Returned alpha is in [0, 1].""" + _, target = lw_alpha_and_target(X) + alpha = lw_alpha_for_target(X, target) + assert 0.0 <= alpha <= 1.0 + + def test_agrees_with_lw_alpha_and_target(self, X): # noqa: N803 + """Result matches the alpha from lw_alpha_and_target for the same target.""" + alpha_direct, target = lw_alpha_and_target(X) + alpha_via_fn = lw_alpha_for_target(X, target) + assert alpha_direct == pytest.approx(alpha_via_fn, rel=1e-6) + + def test_with_cc_target(self, X): # noqa: N803 + """Works with a constant-correlation target.""" + target, _ = cc_target(X) + alpha = lw_alpha_for_target(X, target) + assert 0.0 <= alpha <= 1.0 + + +# --------------------------------------------------------------------------- +# rmt_target_and_alpha +# --------------------------------------------------------------------------- + + +class TestRmtTargetAndAlpha: + """Tests for rmt_target_and_alpha (Marchenko-Pastur eigenvalue cleaning).""" + + def test_return_shapes(self, X): # noqa: N803 + """Return shapes are consistent with n assets and k signal factors.""" + target, lr_factors, k, _alpha = rmt_target_and_alpha(X) + n = X.shape[1] + _bar_lam, U_k, delta_k = lr_factors # noqa: N806 + assert target.shape == (n, n) + assert U_k.shape == (n, k) + assert delta_k.shape == (k,) + + def test_k_non_negative(self, X): # noqa: N803 + """Number of signal factors is non-negative.""" + _, _, k, _ = rmt_target_and_alpha(X) + assert k >= 0 + + def test_alpha_in_unit_interval(self, X): # noqa: N803 + """LW oracle alpha is in [0, 1].""" + _, _, _, alpha = rmt_target_and_alpha(X) + assert 0.0 <= alpha <= 1.0 + + def test_target_is_symmetric(self, X): # noqa: N803 + """RMT target is symmetric.""" + target, *_ = rmt_target_and_alpha(X) + np.testing.assert_allclose(target, target.T, atol=1e-10) + + def test_target_is_psd(self, X): # noqa: N803 + """RMT target is positive semi-definite.""" + target, *_ = rmt_target_and_alpha(X) + eigs = np.linalg.eigvalsh(target) + assert np.all(eigs >= -1e-10) + + def test_lr_factors_reconstruct_target(self, X): # noqa: N803 + """Low-rank factors reproduce the full target: bar_lam*I + U_k diag(delta_k) U_k^T.""" + target, lr_factors, _k, _ = rmt_target_and_alpha(X) + bar_lam, U_k, delta_k = lr_factors # noqa: N806 + n = X.shape[1] + reconstructed = bar_lam * np.eye(n) + U_k @ np.diag(delta_k) @ U_k.T + np.testing.assert_allclose(reconstructed, target, atol=1e-10) + + def test_rank_deficient(self): + """Works on rank-deficient X (n > T) where many eigenvalues are zero.""" + X = np.random.default_rng(7).standard_normal((15, 50)) # noqa: N806 + target, lr_factors, k, alpha = rmt_target_and_alpha(X) + bar_lam, U_k, delta_k = lr_factors # noqa: N806 + assert target.shape == (50, 50) + assert U_k.shape == (50, k) + assert 0.0 <= alpha <= 1.0 + reconstructed = bar_lam * np.eye(50) + U_k @ np.diag(delta_k) @ U_k.T + np.testing.assert_allclose(reconstructed, target, atol=1e-10) diff --git a/tests/test_small.py b/tests/test_small.py index e103433..c444d51 100644 --- a/tests/test_small.py +++ b/tests/test_small.py @@ -96,7 +96,111 @@ def solve_fn(active): return np.array([0.5, 0.5]), 1 # nu_2 = 0-1 = -1 → re-add return np.ones(active.sum()) / active.sum(), 1 - w, _ = p._constraint_active_set(solve_fn) + w, *_ = p._constraint_active_set(solve_fn) assert call_no[0] == 3 np.testing.assert_allclose(w, [1 / 3, 1 / 3, 1 / 3], atol=1e-10) + + +# --------------------------------------------------------------------------- +# _constraint_active_set_warm — primal drop, dual re-add, gradient branches +# --------------------------------------------------------------------------- + + +def test_warm_default_solve_fn(): + """Calling _constraint_active_set_warm with solve_fn=None defaults to _cg_step.""" + p = MinVarProblem(X3) + w, *_ = p._constraint_active_set_warm() + assert abs(w.sum() - 1.0) < 1e-4 + + +def test_warm_primal_drop_strong(): + """Strongly negative weights (< -10*tol) are all dropped at once.""" + p = MinVarProblem(X3) + call_no = [0] + + def solve_fn(active, x0=None): + """Return a solution with a strongly negative weight on the first call.""" + call_no[0] += 1 + n_a = int(active.sum()) + if call_no[0] == 1: + w = np.ones(n_a) / n_a + w[0] = -0.5 # strongly negative → triggers bulk drop + return w, 1 + return np.ones(n_a) / n_a, 1 + + w, *_ = p._constraint_active_set_warm(solve_fn=solve_fn) + assert np.all(w >= -1e-10) + + +def test_warm_primal_drop_weak(): + """A single mildly negative weight triggers the argmin-drop branch.""" + p = MinVarProblem(X3) + call_no = [0] + + def solve_fn(active, x0=None): + """Return a solution with a mildly negative weight on the first call.""" + call_no[0] += 1 + n_a = int(active.sum()) + if call_no[0] == 1: + w = np.ones(n_a) / n_a + w[-1] = -3e-6 # between -1e-5 and -1e-6: negative but not "strong" + return w, 1 + return np.ones(n_a) / n_a, 1 + + w, *_ = p._constraint_active_set_warm(solve_fn=solve_fn) + assert np.all(w >= -1e-10) + + +def test_warm_dual_readd(): + """Excluded asset with negative dual is re-added (dual step, lines 339-348).""" + p = MinVarProblem(X3) + # Analytic trace: same as test_dual_readd but via the warm interface. + call_no = [0] + + def solve_fn(active, x0=None): + """Return preset solutions mimicking the primal-dual trace.""" + call_no[0] += 1 + if call_no[0] == 1: + return np.array([2 / 3, 2 / 3, -1 / 3]), 1 # drop asset 2 + if call_no[0] == 2: + return np.array([0.5, 0.5]), 1 # nu[2]= 2-1=1 ≥ 0 → done + return np.ones(int(active.sum())) / int(active.sum()), 1 + + w, *_ = p._constraint_active_set_warm(solve_fn=solve_fn) + np.testing.assert_allclose(w[:2], [0.5, 0.5], atol=1e-10) + + +def test_warm_gradient_with_target(): + """Target (dense) gradient branch is exercised inside the warm loop.""" + rng = np.random.default_rng(0) + X = rng.standard_normal((30, 4)) # noqa: N806 + target = np.eye(4) + alpha = 0.3 + p = MinVarProblem(X, alpha=alpha, target=target) + w, *_ = p._constraint_active_set_warm() + assert abs(w.sum() - 1.0) < 1e-4 + + +def test_warm_gradient_with_target_lr(): + """target_lr gradient branch is exercised inside the warm loop.""" + rng = np.random.default_rng(1) + n = 4 + X = rng.standard_normal((30, n)) # noqa: N806 + U_k = rng.standard_normal((n, 2)) # noqa: N806 + delta_k = np.abs(rng.standard_normal(2)) + 0.1 + target_lr = (0.5, U_k, delta_k) + alpha = 0.3 + p = MinVarProblem(X, alpha=alpha, target_lr=target_lr) + w, *_ = p._constraint_active_set_warm() + assert abs(w.sum() - 1.0) < 1e-4 + + +def test_warm_gradient_with_rho(): + """Rho != 0 gradient branch is exercised inside the warm loop.""" + rng = np.random.default_rng(2) + X = rng.standard_normal((30, 4)) # noqa: N806 + mu = rng.standard_normal(4) + p = MinVarProblem(X, rho=0.5, mu=mu) + w, *_ = p._constraint_active_set_warm() + assert abs(w.sum() - 1.0) < 1e-4 diff --git a/uv.lock b/uv.lock index 816cb66..a976c3b 100644 --- a/uv.lock +++ b/uv.lock @@ -451,6 +451,7 @@ dev = [ { name = "pandas" }, { name = "pyarrow" }, { name = "requests" }, + { name = "scikit-learn" }, { name = "yfinance" }, ] test = [ @@ -474,6 +475,7 @@ dev = [ { name = "pandas", specifier = ">=3.0" }, { name = "pyarrow", specifier = ">=24.0.0" }, { name = "requests", specifier = ">=2.0" }, + { name = "scikit-learn", specifier = ">=1.5" }, { name = "yfinance", specifier = ">=0.2" }, ] lint = [] @@ -1810,6 +1812,51 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] +[[package]] +name = "scikit-learn" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "narwhals" }, + { name = "numpy" }, + { name = "scipy" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/6f/37092bdb25f712817231799fc5674d8e704066a8a70c1d2d40517e18b4ab/scikit_learn-1.9.0.tar.gz", hash = "sha256:8833266989d3a5110178a9fae30783675460724d0e1efb13b14901d2c660c557", size = 7750767, upload-time = "2026-06-02T11:54:32.706Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f5/be/e844fd9586e66540a15b71924d17a6cbc1bb749e81ddd0a796bcdba4c055/scikit_learn-1.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9db6f4d34e68c8899e4cab27fdf8eafe6ed21f2ba52ceb25ea250cd237f8e47b", size = 8789686, upload-time = "2026-06-02T11:53:05.439Z" }, + { url = "https://files.pythonhosted.org/packages/42/e2/ff880f62677a17d035817d543cb0fc8727d01eccbee81c5f7fc733a9d856/scikit_learn-1.9.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:f401448645a3e7bc115aa3c094097865155b34bff1cba8101857d9104e99074c", size = 8256782, upload-time = "2026-06-02T11:53:08.904Z" }, + { url = "https://files.pythonhosted.org/packages/25/64/eb40435e1a508ab1b4e284ce43ae80f6a162e5be5e38ed5a6fab467a9ea4/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd3a8ef0c758555a3b23c03adaa858af32f7736785ded50ad5991f59c4ed03fa", size = 8992419, upload-time = "2026-06-02T11:53:11.551Z" }, + { url = "https://files.pythonhosted.org/packages/8d/da/4810a28e473185429e45a57eebcc91fc991b33d889cc0676063e671db03d/scikit_learn-1.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f7e254636164090da847715a27f8e5478feb98c40a9e0ee90cbd277de9e5ceb8", size = 9281411, upload-time = "2026-06-02T11:53:15.063Z" }, + { url = "https://files.pythonhosted.org/packages/3b/67/be3d369f40d8178ba3bd86635d132e08cb5329b023e4669d9426d84bc007/scikit_learn-1.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:5dc1818c77575d149e25fce9ef82dd7b7263ae372f03494158668ad632a69759", size = 8272736, upload-time = "2026-06-02T11:53:18.108Z" }, + { url = "https://files.pythonhosted.org/packages/37/79/a733f02dc2118da7e77a134b34f39f40201a353311b011d20859d2db3556/scikit_learn-1.9.0-cp311-cp311-win_arm64.whl", hash = "sha256:366652351f092b219c248f1e72821e841960a63d8f358f1dcfd54dc1cbdbbc28", size = 7919564, upload-time = "2026-06-02T11:53:21.2Z" }, + { url = "https://files.pythonhosted.org/packages/ac/20/75f915ff375d6249e6550ac740fdbbd66159a068fd3af1400ff62036b07a/scikit_learn-1.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2bd41b0d201bc81575531b96b713d3eb5e5f50fb0b82101ff0f92294fdc236ac", size = 8741122, upload-time = "2026-06-02T11:53:24.08Z" }, + { url = "https://files.pythonhosted.org/packages/cc/d5/2b5148f2279196775e1db2aeb85d14b70ac80e7e32b3b28e7ebeafb0901d/scikit_learn-1.9.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:5be45aa4a42a68a533913a6ed736cf309de2226411c79ef8d609a5456f1939b1", size = 8261512, upload-time = "2026-06-02T11:53:27.183Z" }, + { url = "https://files.pythonhosted.org/packages/a0/ee/5adbc77656b71f9456a2f5a7a9fdb4bcf9207a6b962889f1c2f9323afa4e/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5e50ed4da51974e86e940690e9a3d82e729b62b5a49f7c9bac534d515d39d86f", size = 8837603, upload-time = "2026-06-02T11:53:30.328Z" }, + { url = "https://files.pythonhosted.org/packages/6c/c2/63fdda36c56437eeb44aaf9493c8bcd62ce230ab1598924fc626ffbfa943/scikit_learn-1.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:056c92bb67ad4c28463c2f2653d9701449201e7e7a9e94e321be0f71c4fef2b8", size = 9132097, upload-time = "2026-06-02T11:53:33.456Z" }, + { url = "https://files.pythonhosted.org/packages/83/a4/c8e67227c680e2259c8864ae72ff48b06e16a6f51253a22167aa02a8aa4e/scikit_learn-1.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:4306775fad04cc4b472a1b15af1ae9cede1540fbfcc17fbce3767cd8dc7ae283", size = 8211173, upload-time = "2026-06-02T11:53:36.602Z" }, + { url = "https://files.pythonhosted.org/packages/cf/fd/3c0863792e98e67e9184aa4029288a175935eb65443afcd30d4f143450cf/scikit_learn-1.9.0-cp312-cp312-win_arm64.whl", hash = "sha256:26e22435f63bcdcf396b574273f29f13dd531f5ea035801f5be10ba1540a4e60", size = 7867451, upload-time = "2026-06-02T11:53:39.075Z" }, + { url = "https://files.pythonhosted.org/packages/3c/01/cf3310626b6d48d3e9be69a1223f9180360b5e6edb045f50fade723ce494/scikit_learn-1.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:80746d63bd4b6eaca54d36fe5feaf4d28bb38dc6f9470f81c7cad7c40155f119", size = 8705188, upload-time = "2026-06-02T11:53:41.964Z" }, + { url = "https://files.pythonhosted.org/packages/3e/04/5acd7ae280c5f93b6ac5ef6cdec14eef4c8d1cd91d85b3292989c94d96b1/scikit_learn-1.9.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5b934c45c252844a91d69fda3a34cff5e7307e1db10d77cb10a3980312c74713", size = 8228299, upload-time = "2026-06-02T11:53:44.817Z" }, + { url = "https://files.pythonhosted.org/packages/0c/39/ffe829a5b8ecb40a518724a997794657fdc354ada5e8fe8e64d998c0bac9/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:38c3dcb9a1ffb85505ec53d54c7b4aea0cff70050425a7760c2af661ac85df05", size = 8789690, upload-time = "2026-06-02T11:53:47.461Z" }, + { url = "https://files.pythonhosted.org/packages/1f/88/8dab5de10c638c083772a6be83a3d8106ced492f74a928c8693638e5bb50/scikit_learn-1.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da76d09304a4706db7cc1e3ebaa3b6b98a67365cc11d2996c4f1e58ba47df714", size = 9087723, upload-time = "2026-06-02T11:53:50.702Z" }, + { url = "https://files.pythonhosted.org/packages/20/3f/7917ca72464038f6240ec70c29f94862d08a34a74291ae4d4ec5eb8186a0/scikit_learn-1.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5808d98f15c6bf6d9d96d2348c1997392a5888ce7097e664105f930c4bca1277", size = 8184330, upload-time = "2026-06-02T11:53:53.396Z" }, + { url = "https://files.pythonhosted.org/packages/78/c7/15739eb2f61fda3c54639e9942414e5a19ad8a8d1f5a3266afad7cb7df80/scikit_learn-1.9.0-cp313-cp313-win_arm64.whl", hash = "sha256:d77f54c017633791bc0225a43e2f8d03745fdcfe4880268fcc4df15f505dec2e", size = 7840653, upload-time = "2026-06-02T11:53:56.035Z" }, + { url = "https://files.pythonhosted.org/packages/f4/7d/c9a35cf59b20a86fec24d306f1547b78dec194b08d367ce2a3e4854169d9/scikit_learn-1.9.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9656acd4e93f74e0b66c8a36c88830a99252dfa900044d36bc2212ae89a47162", size = 8713289, upload-time = "2026-06-02T11:53:58.788Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a7/552a7821597c632b907f7bfe8f36f9f572777af8ef8a48353041cf8e091a/scikit_learn-1.9.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:24360002ae845e7866522b0a5bbf690802e7bc388cac8663502e78aa98598aa2", size = 8245141, upload-time = "2026-06-02T11:54:01.694Z" }, + { url = "https://files.pythonhosted.org/packages/7d/79/f4a0c4fe9711154cddabf913471153af79056382ddc612cfe5ee0ff4b72e/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5162ad10a418c8a282dde04c9aa06965de3e9a65f33c1440c0ae69bb1a09d913", size = 8847671, upload-time = "2026-06-02T11:54:04.448Z" }, + { url = "https://files.pythonhosted.org/packages/f0/af/4d72d9e475ac83719160c662619e4bf7b95c19507cd582e7d0167a3c3dae/scikit_learn-1.9.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fea2cc5677ab49d6f5bade978c866da44957b712d92e9635e8b4f723013c3cb", size = 9118104, upload-time = "2026-06-02T11:54:07.205Z" }, + { url = "https://files.pythonhosted.org/packages/a2/d5/6a58eea2cb9abbb9b3f2bb8b2cfb3243d1152d69f442d256c7af71304769/scikit_learn-1.9.0-cp314-cp314-win_amd64.whl", hash = "sha256:64fa347efc1c839c487433e40c5144d38c336e8a2b59c81aa8660373945c2673", size = 8290674, upload-time = "2026-06-02T11:54:10.087Z" }, + { url = "https://files.pythonhosted.org/packages/65/5b/d4c879cf358f1187141cf90ced473f087183489090244f50c124a2ee478b/scikit_learn-1.9.0-cp314-cp314-win_arm64.whl", hash = "sha256:1b944b6db288f6b926e3650026ddafb988929de95d11fc2cc5fa117773c9ba42", size = 7978807, upload-time = "2026-06-02T11:54:12.769Z" }, + { url = "https://files.pythonhosted.org/packages/8a/43/bfae3121ec67ae09150d453c442c7c1cc166e9aefe056e6ab3b7728a5cfc/scikit_learn-1.9.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:4ccacf04ca5f4b492158a5f28afe0ace43f81b2571e4b9a66d34848b46128949", size = 9031941, upload-time = "2026-06-02T11:54:15.436Z" }, + { url = "https://files.pythonhosted.org/packages/75/b0/20a4546eb17f3b25d3c66df15810411c14ed5065bcfab50b53c96fb627b2/scikit_learn-1.9.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:ee1a8db2c18c08e34c7412d4b10be1cac214cd4ea7dc9715a6a327eb49a37c96", size = 8613528, upload-time = "2026-06-02T11:54:18.842Z" }, + { url = "https://files.pythonhosted.org/packages/18/3c/e440e039bb82cd19004edaaad00acbde0fb9b461083c3ecf37941c557312/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:147e9329ef0e39f75d4cffa02b2aa48d827832684926cd5210d9a2cb5c57246b", size = 8855050, upload-time = "2026-06-02T11:54:21.699Z" }, + { url = "https://files.pythonhosted.org/packages/43/26/b341b8dab5998da6270a3a42c2152c578501354d36f944b5856757035ef8/scikit_learn-1.9.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bad8f8b9950321b54c965fdcbac6c6c55e79e16646b49977bcf3668d3870a1a", size = 9097190, upload-time = "2026-06-02T11:54:24.454Z" }, + { url = "https://files.pythonhosted.org/packages/fb/de/b650b4d69b84468cfa2e28a3ff7b8103743029e6446ce1a97fe060ef688c/scikit_learn-1.9.0-cp314-cp314t-win_amd64.whl", hash = "sha256:78fc56eafd4edb9575d2d8950d1dd152061abb573341a1cb7e099fc40f6c6666", size = 8963204, upload-time = "2026-06-02T11:54:27.428Z" }, + { url = "https://files.pythonhosted.org/packages/ee/f3/ff83d76d7418112e5a61326443cdda87be3545dd8d6599c95b2481a4419e/scikit_learn-1.9.0-cp314-cp314t-win_arm64.whl", hash = "sha256:051075bda8b7aab87b1906ab3d4740a1e1224a19d7b3781a576736edc94e76aa", size = 8222661, upload-time = "2026-06-02T11:54:30.192Z" }, +] + [[package]] name = "scipy" version = "1.17.1" @@ -1989,6 +2036,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, ] +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274, upload-time = "2025-03-13T13:49:23.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, +] + [[package]] name = "tomlkit" version = "0.14.0"