From 1ff6172bc7506d0c6b208237845cfd8a22e5cf7d Mon Sep 17 00:00:00 2001 From: Misha Kutsovsky Date: Wed, 18 Feb 2026 11:31:40 -0500 Subject: [PATCH 1/6] Add volume factor family with factory pattern and crypto evaluation --- scripts/evaluate_volume_factors_crypto.py | 171 +++++++++++ src/factorlab/factors/__init__.py | 4 + src/factorlab/factors/volume/__init__.py | 41 +++ src/factorlab/factors/volume/base.py | 279 ++++++++++++++++++ .../volume/delta_negative_volume_indicator.py | 21 ++ .../factors/volume/delta_on_balance_volume.py | 21 ++ .../volume/delta_positive_volume_indicator.py | 21 ++ .../factors/volume/delta_price_volume_fit.py | 21 ++ .../volume/delta_product_price_volume.py | 35 +++ .../factors/volume/delta_sum_price_volume.py | 35 +++ .../factors/volume/delta_volume_momentum.py | 22 ++ .../factors/volume/diff_price_volume_fit.py | 22 ++ .../volume/diff_volume_weighted_ma_over_ma.py | 22 ++ .../volume/negative_volume_indicator.py | 19 ++ .../factors/volume/on_balance_volume.py | 19 ++ .../volume/positive_volume_indicator.py | 19 ++ .../factors/volume/price_volume_fit.py | 19 ++ .../factors/volume/product_price_volume.py | 32 ++ .../factors/volume/sum_price_volume.py | 32 ++ src/factorlab/factors/volume/volume.py | 132 +++++++++ .../factors/volume/volume_momentum.py | 20 ++ .../volume/volume_weighted_ma_over_ma.py | 19 ++ tests/conftest.py | 10 + tests/features/test_volume_factors.py | 127 ++++++++ 24 files changed, 1163 insertions(+) create mode 100644 scripts/evaluate_volume_factors_crypto.py create mode 100644 src/factorlab/factors/__init__.py create mode 100644 src/factorlab/factors/volume/__init__.py create mode 100644 src/factorlab/factors/volume/base.py create mode 100644 src/factorlab/factors/volume/delta_negative_volume_indicator.py create mode 100644 src/factorlab/factors/volume/delta_on_balance_volume.py create mode 100644 src/factorlab/factors/volume/delta_positive_volume_indicator.py create mode 100644 src/factorlab/factors/volume/delta_price_volume_fit.py create mode 100644 src/factorlab/factors/volume/delta_product_price_volume.py create mode 100644 src/factorlab/factors/volume/delta_sum_price_volume.py create mode 100644 src/factorlab/factors/volume/delta_volume_momentum.py create mode 100644 src/factorlab/factors/volume/diff_price_volume_fit.py create mode 100644 src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py create mode 100644 src/factorlab/factors/volume/negative_volume_indicator.py create mode 100644 src/factorlab/factors/volume/on_balance_volume.py create mode 100644 src/factorlab/factors/volume/positive_volume_indicator.py create mode 100644 src/factorlab/factors/volume/price_volume_fit.py create mode 100644 src/factorlab/factors/volume/product_price_volume.py create mode 100644 src/factorlab/factors/volume/sum_price_volume.py create mode 100644 src/factorlab/factors/volume/volume.py create mode 100644 src/factorlab/factors/volume/volume_momentum.py create mode 100644 src/factorlab/factors/volume/volume_weighted_ma_over_ma.py create mode 100644 tests/conftest.py create mode 100644 tests/features/test_volume_factors.py diff --git a/scripts/evaluate_volume_factors_crypto.py b/scripts/evaluate_volume_factors_crypto.py new file mode 100644 index 0000000..9a41b2c --- /dev/null +++ b/scripts/evaluate_volume_factors_crypto.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import Dict, List, Tuple + +import numpy as np +import pandas as pd + + +ROOT = Path(__file__).resolve().parents[1] +SRC = ROOT / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + +from factorlab.factors.volume import Volume # noqa: E402 + + +FACTOR_SPECS: List[Tuple[str, Dict[str, int]]] = [ + ("volume_momentum", {"hist_length": 20, "multiplier": 4}), + ("delta_volume_momentum", {"hist_length": 20, "multiplier": 4, "delta_len": 100}), + ("volume_weighted_ma_over_ma", {"hist_length": 50}), + ("diff_volume_weighted_ma_over_ma", {"short_dist": 20, "long_dist": 100}), + ("price_volume_fit", {"hist_length": 50}), + ("diff_price_volume_fit", {"short_dist": 20, "long_dist": 100}), + ("delta_price_volume_fit", {"hist_length": 20, "delta_dist": 30}), + ("on_balance_volume", {"hist_length": 50}), + ("delta_on_balance_volume", {"hist_length": 50, "delta_dist": 45}), + ("positive_volume_indicator", {"hist_length": 40}), + ("delta_positive_volume_indicator", {"hist_length": 40, "delta_dist": 35}), + ("negative_volume_indicator", {"hist_length": 40}), + ("delta_negative_volume_indicator", {"hist_length": 40, "delta_dist": 35}), + ("product_price_volume", {"hist_length": 25}), + ("sum_price_volume", {"hist_length": 25}), + ("delta_product_price_volume", {"hist_length": 40, "delta_dist": 35}), + ("delta_sum_price_volume", {"hist_length": 40, "delta_dist": 35}), +] + + +def load_crypto_ohlcv(data_dir: Path, max_symbols: int) -> pd.DataFrame: + files = sorted(data_dir.glob("*.csv"))[:max_symbols] + if not files: + raise FileNotFoundError(f"No CSV files found in: {data_dir}") + + parts = [] + for path in files: + try: + tmp = pd.read_csv( + path, + usecols=["open_time", "open", "high", "low", "close", "volume", "ticker"], + ) + except Exception: + continue + + if tmp.empty: + continue + + tmp["date"] = pd.to_datetime(tmp["open_time"], errors="coerce") + tmp = tmp.dropna(subset=["date", "ticker"]) + tmp = tmp.set_index(["date", "ticker"]).sort_index() + parts.append(tmp[["open", "high", "low", "close", "volume"]]) + + if not parts: + raise RuntimeError(f"Could not load usable OHLCV data from: {data_dir}") + + df = pd.concat(parts, axis=0).sort_index() + counts = df.groupby(level=1).size() + keep = counts[counts >= 365].index + df = df[df.index.get_level_values(1).isin(keep)] + return df + + +def evaluate(df: pd.DataFrame, top_n: int, ann_factor: int = 365) -> pd.DataFrame: + close = df["close"] + volume = df["volume"] + fwd_ret = close.groupby(level=1).shift(-1).div(close) - 1.0 + + notional = close * volume + liquidity = ( + notional.groupby(level=1) + .rolling(window=20, min_periods=20) + .mean() + .droplevel(0) + .sort_index() + ) + eligible = liquidity.groupby(level=0).rank(ascending=False, method="first") <= top_n + + rows = [] + for method, kwargs in FACTOR_SPECS: + factor = Volume(method=method, **kwargs) + out = factor.compute(df) + new_col = [c for c in out.columns if c not in df.columns][0] + + panel = pd.concat( + [ + out[new_col].rename("factor"), + fwd_ret.rename("fwd_ret"), + eligible.rename("eligible"), + ], + axis=1, + ) + panel = panel[panel["eligible"]].dropna() + if panel.empty: + continue + + daily_ic = panel.groupby(level=0).apply( + lambda g: g["factor"].corr(g["fwd_ret"], method="spearman") if g.shape[0] >= 10 else np.nan + ) + + ranked = panel.copy() + ranked["weight"] = ranked.groupby(level=0)["factor"].rank(pct=True) - 0.5 + gross = ranked.groupby(level=0)["weight"].transform(lambda s: s.abs().sum()) + ranked["weight"] = ranked["weight"] / gross.replace(0, np.nan) + ls_ret = (ranked["weight"] * ranked["fwd_ret"]).groupby(level=0).sum() + + mean_ic = float(daily_ic.mean()) if daily_ic.notna().any() else np.nan + std_ic = float(daily_ic.std()) if daily_ic.notna().sum() > 1 else np.nan + ic_ir = mean_ic / std_ic if std_ic and np.isfinite(std_ic) else np.nan + + ann_ret = float(ls_ret.mean() * ann_factor) if ls_ret.notna().any() else np.nan + ann_vol = float(ls_ret.std() * np.sqrt(ann_factor)) if ls_ret.notna().sum() > 1 else np.nan + sharpe_365 = ann_ret / ann_vol if ann_vol and np.isfinite(ann_vol) else np.nan + + rows.append( + { + "method": method, + "n_ic_obs": int(daily_ic.notna().sum()), + "mean_ic": mean_ic, + "ic_ir": ic_ir, + "ann_ret_365": ann_ret, + "ann_vol_365": ann_vol, + "sharpe_365": sharpe_365, + } + ) + + if not rows: + raise RuntimeError("No factors produced valid evaluation rows.") + + return pd.DataFrame(rows).set_index("method").sort_values("mean_ic", ascending=False) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Evaluate volume factors on crypto universe.") + parser.add_argument( + "--data-dir", + type=Path, + default=Path("/Users/mikuts/astrofactor/astroblade/data/systamental/crypto/survivorship/binance_klines_history/daily/futures"), + help="Directory containing per-symbol daily OHLCV CSV files.", + ) + parser.add_argument("--max-symbols", type=int, default=120, help="Maximum number of symbol CSVs to load.") + parser.add_argument("--top-n", type=int, default=60, help="Top-N liquid assets used each day.") + parser.add_argument("--output", type=Path, default=None, help="Optional output CSV for summary.") + args = parser.parse_args() + + df = load_crypto_ohlcv(args.data_dir, args.max_symbols) + summary = evaluate(df, top_n=args.top_n, ann_factor=365) + + pd.set_option("display.width", 200) + pd.set_option("display.max_columns", 20) + print(summary.round(4)) + + if args.output is not None: + args.output.parent.mkdir(parents=True, exist_ok=True) + summary.to_csv(args.output) + print(f"\nSaved: {args.output}") + + +if __name__ == "__main__": + main() + diff --git a/src/factorlab/factors/__init__.py b/src/factorlab/factors/__init__.py new file mode 100644 index 0000000..3068a35 --- /dev/null +++ b/src/factorlab/factors/__init__.py @@ -0,0 +1,4 @@ +from factorlab.factors.base import Factor + +__all__ = ["Factor"] + diff --git a/src/factorlab/factors/volume/__init__.py b/src/factorlab/factors/volume/__init__.py new file mode 100644 index 0000000..2aeff80 --- /dev/null +++ b/src/factorlab/factors/volume/__init__.py @@ -0,0 +1,41 @@ +from factorlab.factors.volume.base import VolumeFactor +from factorlab.factors.volume.volume import Volume +from factorlab.factors.volume.volume_momentum import VolumeMomentum +from factorlab.factors.volume.delta_volume_momentum import DeltaVolumeMomentum +from factorlab.factors.volume.volume_weighted_ma_over_ma import VolumeWeightedMAOverMA +from factorlab.factors.volume.diff_volume_weighted_ma_over_ma import DiffVolumeWeightedMAOverMA +from factorlab.factors.volume.price_volume_fit import PriceVolumeFit +from factorlab.factors.volume.diff_price_volume_fit import DiffPriceVolumeFit +from factorlab.factors.volume.delta_price_volume_fit import DeltaPriceVolumeFit +from factorlab.factors.volume.on_balance_volume import OnBalanceVolume +from factorlab.factors.volume.delta_on_balance_volume import DeltaOnBalanceVolume +from factorlab.factors.volume.positive_volume_indicator import PositiveVolumeIndicator +from factorlab.factors.volume.delta_positive_volume_indicator import DeltaPositiveVolumeIndicator +from factorlab.factors.volume.negative_volume_indicator import NegativeVolumeIndicator +from factorlab.factors.volume.delta_negative_volume_indicator import DeltaNegativeVolumeIndicator +from factorlab.factors.volume.product_price_volume import ProductPriceVolume +from factorlab.factors.volume.sum_price_volume import SumPriceVolume +from factorlab.factors.volume.delta_product_price_volume import DeltaProductPriceVolume +from factorlab.factors.volume.delta_sum_price_volume import DeltaSumPriceVolume + +__all__ = [ + "VolumeFactor", + "Volume", + "VolumeMomentum", + "DeltaVolumeMomentum", + "VolumeWeightedMAOverMA", + "DiffVolumeWeightedMAOverMA", + "PriceVolumeFit", + "DiffPriceVolumeFit", + "DeltaPriceVolumeFit", + "OnBalanceVolume", + "DeltaOnBalanceVolume", + "PositiveVolumeIndicator", + "DeltaPositiveVolumeIndicator", + "NegativeVolumeIndicator", + "DeltaNegativeVolumeIndicator", + "ProductPriceVolume", + "SumPriceVolume", + "DeltaProductPriceVolume", + "DeltaSumPriceVolume", +] diff --git a/src/factorlab/factors/volume/base.py b/src/factorlab/factors/volume/base.py new file mode 100644 index 0000000..bf9b068 --- /dev/null +++ b/src/factorlab/factors/volume/base.py @@ -0,0 +1,279 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, List, Optional, Union + +import numpy as np +import pandas as pd + +from factorlab.factors.base import Factor +from factorlab.utils import to_dataframe + + +class VolumeFactor(Factor, ABC): + """Base class for volume/price interaction factors.""" + + def __init__( + self, + price_col: str = "close", + volume_col: str = "volume", + output_col: Optional[str] = None, + compress: bool = True, + compression_window: int = 250, + compression_min_periods: int = 30, + compression_strength: float = 1.0, + **kwargs: Any, + ): + super().__init__( + name=self.__class__.__name__, + description="Base class for volume factors.", + category="Volume", + tags=["volume", "flow", "microstructure"], + ) + self.price_col = price_col + self.volume_col = volume_col + self.output_col = output_col + self.compress = compress + self.compression_window = compression_window + self.compression_min_periods = compression_min_periods + self.compression_strength = compression_strength + self.kwargs = kwargs + + @property + def inputs(self) -> List[str]: + return [self.price_col, self.volume_col] + + def fit( + self, + X: Union[pd.Series, pd.DataFrame], + y: Optional[Union[pd.Series, pd.DataFrame]] = None, + ) -> "VolumeFactor": + df_input = to_dataframe(X) + self.validate_inputs(df_input) + self._is_fitted = True + return self + + def transform(self, X: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame: + if not self._is_fitted: + raise RuntimeError(f"Transform '{self.name}' must be fitted before calling transform()") + + df = to_dataframe(X).copy(deep=True) + self.validate_inputs(df) + df = df.sort_index() + + factor = self._compute_volume(df) + if self.compress: + factor = self._compress(factor) + + df[self._generate_name()] = factor.clip(-50, 50) + return df + + @abstractmethod + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + raise NotImplementedError + + def _generate_name(self) -> str: + return self.output_col or self.name + + def _is_multiindex(self, series: pd.Series) -> bool: + return isinstance(series.index, pd.MultiIndex) + + def _safe_log(self, series: pd.Series) -> pd.Series: + return np.log(series.where(series > 0, np.nan)) + + def _shift_by_asset(self, series: pd.Series, periods: int) -> pd.Series: + if self._is_multiindex(series): + return series.groupby(level=1).shift(periods) + return series.shift(periods) + + def _pct_change_by_asset(self, series: pd.Series, periods: int = 1) -> pd.Series: + if self._is_multiindex(series): + return series.groupby(level=1).pct_change(periods=periods, fill_method=None) + return series.pct_change(periods=periods, fill_method=None) + + def _diff_by_asset(self, series: pd.Series, periods: int = 1) -> pd.Series: + if self._is_multiindex(series): + return series.groupby(level=1).diff(periods=periods) + return series.diff(periods=periods) + + def _rolling_stat( + self, + series: pd.Series, + window: int, + stat: str, + min_periods: Optional[int] = None, + **kwargs: Any, + ) -> pd.Series: + min_periods = window if min_periods is None else min_periods + + if self._is_multiindex(series): + rolled = getattr( + series.groupby(level=1).rolling(window=window, min_periods=min_periods), + stat, + )(**kwargs) + return rolled.droplevel(0).sort_index() + + return getattr(series.rolling(window=window, min_periods=min_periods), stat)(**kwargs) + + def _compress(self, raw: pd.Series) -> pd.Series: + robust_scale = self._rolling_stat( + raw.abs(), + window=self.compression_window, + stat="median", + min_periods=self.compression_min_periods, + ).replace(0, np.nan) + + normalized = raw / robust_scale + return 50.0 * np.tanh(self.compression_strength * normalized) + + # Shared raw components + def _raw_volume_momentum(self, df: pd.DataFrame, hist_length: int, multiplier: int) -> pd.Series: + volume = df[self.volume_col] + short_ma = self._rolling_stat(volume, window=hist_length, stat="mean") + long_ma = self._rolling_stat(volume, window=hist_length * multiplier, stat="mean") + return self._safe_log(short_ma / long_ma.replace(0, np.nan)) + + def _raw_volume_weighted_ma_over_ma(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + pv = close * volume + + vwma = self._rolling_stat(pv, window=hist_length, stat="sum") / self._rolling_stat( + volume, window=hist_length, stat="sum" + ).replace(0, np.nan) + ma = self._rolling_stat(close, window=hist_length, stat="mean") + + return self._safe_log(vwma / ma.replace(0, np.nan)) + + def _raw_price_volume_fit(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + + x = self._safe_log(volume) + y = self._safe_log(close) + + mean_x = self._rolling_stat(x, window=hist_length, stat="mean") + mean_y = self._rolling_stat(y, window=hist_length, stat="mean") + mean_xy = self._rolling_stat(x * y, window=hist_length, stat="mean") + mean_x2 = self._rolling_stat(x * x, window=hist_length, stat="mean") + + cov_xy = mean_xy - (mean_x * mean_y) + var_x = mean_x2 - (mean_x * mean_x) + return cov_xy / var_x.replace(0, np.nan) + + def _raw_on_balance_volume(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + + close_diff = self._diff_by_asset(close, 1) + signed_volume = volume * np.sign(close_diff) + + signed_sum = self._rolling_stat(signed_volume, window=hist_length, stat="sum") + total_sum = self._rolling_stat(volume, window=hist_length, stat="sum") + return signed_sum / total_sum.replace(0, np.nan) + + def _raw_positive_volume_indicator(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + + rel_change = self._pct_change_by_asset(close, periods=1) + prev_volume = self._shift_by_asset(volume, 1) + filtered = rel_change.where(volume > prev_volume, 0.0) + + avg_change = self._rolling_stat(filtered, window=hist_length, stat="mean") + norm_window = max(2 * hist_length, 250) + std_change = self._rolling_stat( + rel_change, + window=norm_window, + stat="std", + min_periods=hist_length, + ).replace(0, np.nan) + return avg_change / std_change + + def _raw_negative_volume_indicator(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + + rel_change = self._pct_change_by_asset(close, periods=1) + prev_volume = self._shift_by_asset(volume, 1) + filtered = rel_change.where(volume < prev_volume, 0.0) + + avg_change = self._rolling_stat(filtered, window=hist_length, stat="mean") + norm_window = max(2 * hist_length, 250) + std_change = self._rolling_stat( + rel_change, + window=norm_window, + stat="std", + min_periods=hist_length, + ).replace(0, np.nan) + return avg_change / std_change + + def _normalized_volume_and_price_change( + self, + df: pd.DataFrame, + norm_lookback: int, + norm_min_periods: int, + ) -> tuple[pd.Series, pd.Series]: + close = df[self.price_col] + volume = df[self.volume_col] + + prior_volume = self._shift_by_asset(volume, 1) + median_volume = self._rolling_stat( + prior_volume, + window=norm_lookback, + stat="median", + min_periods=norm_min_periods, + ).replace(0, np.nan) + normalized_volume = volume / median_volume + + log_close = self._safe_log(close) + price_change = self._diff_by_asset(log_close, 1) + prior_change = self._shift_by_asset(price_change, 1) + + median_change = self._rolling_stat( + prior_change, + window=norm_lookback, + stat="median", + min_periods=norm_min_periods, + ) + q75 = self._rolling_stat( + prior_change, + window=norm_lookback, + stat="quantile", + min_periods=norm_min_periods, + q=0.75, + ) + q25 = self._rolling_stat( + prior_change, + window=norm_lookback, + stat="quantile", + min_periods=norm_min_periods, + q=0.25, + ) + iqr = (q75 - q25).replace(0, np.nan) + + normalized_change = (price_change - median_change) / iqr + return normalized_volume, normalized_change + + def _raw_product_price_volume( + self, + df: pd.DataFrame, + hist_length: int, + norm_lookback: int = 250, + norm_min_periods: int = 50, + ) -> pd.Series: + norm_vol, norm_change = self._normalized_volume_and_price_change(df, norm_lookback, norm_min_periods) + precursor = norm_vol * norm_change + return self._rolling_stat(precursor, window=hist_length, stat="mean") + + def _raw_sum_price_volume( + self, + df: pd.DataFrame, + hist_length: int, + norm_lookback: int = 250, + norm_min_periods: int = 50, + ) -> pd.Series: + norm_vol, norm_change = self._normalized_volume_and_price_change(df, norm_lookback, norm_min_periods) + precursor = norm_vol + norm_change.abs() + precursor = precursor.where(norm_change >= 0, -precursor) + return self._rolling_stat(precursor, window=hist_length, stat="mean") diff --git a/src/factorlab/factors/volume/delta_negative_volume_indicator.py b/src/factorlab/factors/volume/delta_negative_volume_indicator.py new file mode 100644 index 0000000..53ad774 --- /dev/null +++ b/src/factorlab/factors/volume/delta_negative_volume_indicator.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaNegativeVolumeIndicator(VolumeFactor): + def __init__(self, hist_length: int = 40, delta_dist: int = 35, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.delta_dist = delta_dist + self.name = "DeltaNegativeVolumeIndicator" + self.description = "Current minus lagged negative-volume indicator." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + nvi = self._raw_negative_volume_indicator(df, self.hist_length) + return nvi - self._shift_by_asset(nvi, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_on_balance_volume.py b/src/factorlab/factors/volume/delta_on_balance_volume.py new file mode 100644 index 0000000..c0e6b90 --- /dev/null +++ b/src/factorlab/factors/volume/delta_on_balance_volume.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaOnBalanceVolume(VolumeFactor): + def __init__(self, hist_length: int = 50, delta_dist: int = 45, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.delta_dist = delta_dist + self.name = "DeltaOnBalanceVolume" + self.description = "Current minus lagged on-balance-volume signal." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + obv = self._raw_on_balance_volume(df, self.hist_length) + return obv - self._shift_by_asset(obv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_positive_volume_indicator.py b/src/factorlab/factors/volume/delta_positive_volume_indicator.py new file mode 100644 index 0000000..6b73849 --- /dev/null +++ b/src/factorlab/factors/volume/delta_positive_volume_indicator.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaPositiveVolumeIndicator(VolumeFactor): + def __init__(self, hist_length: int = 40, delta_dist: int = 35, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.delta_dist = delta_dist + self.name = "DeltaPositiveVolumeIndicator" + self.description = "Current minus lagged positive-volume indicator." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + pvi = self._raw_positive_volume_indicator(df, self.hist_length) + return pvi - self._shift_by_asset(pvi, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_price_volume_fit.py b/src/factorlab/factors/volume/delta_price_volume_fit.py new file mode 100644 index 0000000..c87d65b --- /dev/null +++ b/src/factorlab/factors/volume/delta_price_volume_fit.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaPriceVolumeFit(VolumeFactor): + def __init__(self, hist_length: int = 20, delta_dist: int = 30, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.delta_dist = delta_dist + self.name = "DeltaPriceVolumeFit" + self.description = "Current minus lagged price-volume fit slope." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + pvf = self._raw_price_volume_fit(df, self.hist_length) + return pvf - self._shift_by_asset(pvf, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_product_price_volume.py b/src/factorlab/factors/volume/delta_product_price_volume.py new file mode 100644 index 0000000..7fb057a --- /dev/null +++ b/src/factorlab/factors/volume/delta_product_price_volume.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaProductPriceVolume(VolumeFactor): + def __init__( + self, + hist_length: int = 40, + delta_dist: int = 35, + norm_lookback: int = 250, + norm_min_periods: int = 50, + **kwargs, + ): + super().__init__(**kwargs) + self.hist_length = hist_length + self.delta_dist = delta_dist + self.norm_lookback = norm_lookback + self.norm_min_periods = norm_min_periods + self.name = "DeltaProductPriceVolume" + self.description = "Current minus lagged product-price-volume signal." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + ppv = self._raw_product_price_volume( + df, + hist_length=self.hist_length, + norm_lookback=self.norm_lookback, + norm_min_periods=self.norm_min_periods, + ) + return ppv - self._shift_by_asset(ppv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_sum_price_volume.py b/src/factorlab/factors/volume/delta_sum_price_volume.py new file mode 100644 index 0000000..bdd0fa6 --- /dev/null +++ b/src/factorlab/factors/volume/delta_sum_price_volume.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaSumPriceVolume(VolumeFactor): + def __init__( + self, + hist_length: int = 40, + delta_dist: int = 35, + norm_lookback: int = 250, + norm_min_periods: int = 50, + **kwargs, + ): + super().__init__(**kwargs) + self.hist_length = hist_length + self.delta_dist = delta_dist + self.norm_lookback = norm_lookback + self.norm_min_periods = norm_min_periods + self.name = "DeltaSumPriceVolume" + self.description = "Current minus lagged sum-price-volume signal." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + spv = self._raw_sum_price_volume( + df, + hist_length=self.hist_length, + norm_lookback=self.norm_lookback, + norm_min_periods=self.norm_min_periods, + ) + return spv - self._shift_by_asset(spv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_volume_momentum.py b/src/factorlab/factors/volume/delta_volume_momentum.py new file mode 100644 index 0000000..b37b94b --- /dev/null +++ b/src/factorlab/factors/volume/delta_volume_momentum.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DeltaVolumeMomentum(VolumeFactor): + def __init__(self, hist_length: int = 20, multiplier: int = 4, delta_len: int = 100, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.multiplier = multiplier + self.delta_len = delta_len + self.name = "DeltaVolumeMomentum" + self.description = "Current minus lagged volume momentum." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.multiplier}_{self.delta_len}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + vmom = self._raw_volume_momentum(df, self.hist_length, self.multiplier) + return vmom - self._shift_by_asset(vmom, self.delta_len) diff --git a/src/factorlab/factors/volume/diff_price_volume_fit.py b/src/factorlab/factors/volume/diff_price_volume_fit.py new file mode 100644 index 0000000..c68d59f --- /dev/null +++ b/src/factorlab/factors/volume/diff_price_volume_fit.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DiffPriceVolumeFit(VolumeFactor): + def __init__(self, short_dist: int = 20, long_dist: int = 100, **kwargs): + super().__init__(**kwargs) + self.short_dist = short_dist + self.long_dist = long_dist + self.name = "DiffPriceVolumeFit" + self.description = "Short minus long price-volume fit slope." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.short_dist}_{self.long_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + short = self._raw_price_volume_fit(df, self.short_dist) + long = self._raw_price_volume_fit(df, self.long_dist) + return short - long diff --git a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py new file mode 100644 index 0000000..0c4e57a --- /dev/null +++ b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class DiffVolumeWeightedMAOverMA(VolumeFactor): + def __init__(self, short_dist: int = 20, long_dist: int = 100, **kwargs): + super().__init__(**kwargs) + self.short_dist = short_dist + self.long_dist = long_dist + self.name = "DiffVolumeWeightedMAOverMA" + self.description = "Short minus long VWMA-over-MA signal." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.short_dist}_{self.long_dist}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + short = self._raw_volume_weighted_ma_over_ma(df, self.short_dist) + long = self._raw_volume_weighted_ma_over_ma(df, self.long_dist) + return short - long diff --git a/src/factorlab/factors/volume/negative_volume_indicator.py b/src/factorlab/factors/volume/negative_volume_indicator.py new file mode 100644 index 0000000..320809b --- /dev/null +++ b/src/factorlab/factors/volume/negative_volume_indicator.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class NegativeVolumeIndicator(VolumeFactor): + def __init__(self, hist_length: int = 40, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.name = "NegativeVolumeIndicator" + self.description = "Normalized average return on falling-volume bars." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_negative_volume_indicator(df, self.hist_length) diff --git a/src/factorlab/factors/volume/on_balance_volume.py b/src/factorlab/factors/volume/on_balance_volume.py new file mode 100644 index 0000000..f7505cf --- /dev/null +++ b/src/factorlab/factors/volume/on_balance_volume.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class OnBalanceVolume(VolumeFactor): + def __init__(self, hist_length: int = 50, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.name = "OnBalanceVolume" + self.description = "Signed-volume over total-volume ratio." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_on_balance_volume(df, self.hist_length) diff --git a/src/factorlab/factors/volume/positive_volume_indicator.py b/src/factorlab/factors/volume/positive_volume_indicator.py new file mode 100644 index 0000000..fc89a9e --- /dev/null +++ b/src/factorlab/factors/volume/positive_volume_indicator.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class PositiveVolumeIndicator(VolumeFactor): + def __init__(self, hist_length: int = 40, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.name = "PositiveVolumeIndicator" + self.description = "Normalized average return on rising-volume bars." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_positive_volume_indicator(df, self.hist_length) diff --git a/src/factorlab/factors/volume/price_volume_fit.py b/src/factorlab/factors/volume/price_volume_fit.py new file mode 100644 index 0000000..c86cd22 --- /dev/null +++ b/src/factorlab/factors/volume/price_volume_fit.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class PriceVolumeFit(VolumeFactor): + def __init__(self, hist_length: int = 50, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.name = "PriceVolumeFit" + self.description = "Rolling slope for log(price) on log(volume)." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_price_volume_fit(df, self.hist_length) diff --git a/src/factorlab/factors/volume/product_price_volume.py b/src/factorlab/factors/volume/product_price_volume.py new file mode 100644 index 0000000..86a165a --- /dev/null +++ b/src/factorlab/factors/volume/product_price_volume.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class ProductPriceVolume(VolumeFactor): + def __init__( + self, + hist_length: int = 25, + norm_lookback: int = 250, + norm_min_periods: int = 50, + **kwargs, + ): + super().__init__(**kwargs) + self.hist_length = hist_length + self.norm_lookback = norm_lookback + self.norm_min_periods = norm_min_periods + self.name = "ProductPriceVolume" + self.description = "Smoothed product of normalized price and volume shocks." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_product_price_volume( + df, + hist_length=self.hist_length, + norm_lookback=self.norm_lookback, + norm_min_periods=self.norm_min_periods, + ) diff --git a/src/factorlab/factors/volume/sum_price_volume.py b/src/factorlab/factors/volume/sum_price_volume.py new file mode 100644 index 0000000..da77c04 --- /dev/null +++ b/src/factorlab/factors/volume/sum_price_volume.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class SumPriceVolume(VolumeFactor): + def __init__( + self, + hist_length: int = 25, + norm_lookback: int = 250, + norm_min_periods: int = 50, + **kwargs, + ): + super().__init__(**kwargs) + self.hist_length = hist_length + self.norm_lookback = norm_lookback + self.norm_min_periods = norm_min_periods + self.name = "SumPriceVolume" + self.description = "Smoothed signed sum of normalized price/volume shocks." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_sum_price_volume( + df, + hist_length=self.hist_length, + norm_lookback=self.norm_lookback, + norm_min_periods=self.norm_min_periods, + ) diff --git a/src/factorlab/factors/volume/volume.py b/src/factorlab/factors/volume/volume.py new file mode 100644 index 0000000..f3b3381 --- /dev/null +++ b/src/factorlab/factors/volume/volume.py @@ -0,0 +1,132 @@ +import pandas as pd +from typing import ClassVar, Dict, Optional, Type, Union + +from factorlab.core.base_transform import BaseTransform +from factorlab.factors.base import Factor +from factorlab.factors.volume.volume_momentum import VolumeMomentum +from factorlab.factors.volume.delta_volume_momentum import DeltaVolumeMomentum +from factorlab.factors.volume.volume_weighted_ma_over_ma import VolumeWeightedMAOverMA +from factorlab.factors.volume.diff_volume_weighted_ma_over_ma import DiffVolumeWeightedMAOverMA +from factorlab.factors.volume.price_volume_fit import PriceVolumeFit +from factorlab.factors.volume.diff_price_volume_fit import DiffPriceVolumeFit +from factorlab.factors.volume.delta_price_volume_fit import DeltaPriceVolumeFit +from factorlab.factors.volume.on_balance_volume import OnBalanceVolume +from factorlab.factors.volume.delta_on_balance_volume import DeltaOnBalanceVolume +from factorlab.factors.volume.positive_volume_indicator import PositiveVolumeIndicator +from factorlab.factors.volume.delta_positive_volume_indicator import DeltaPositiveVolumeIndicator +from factorlab.factors.volume.negative_volume_indicator import NegativeVolumeIndicator +from factorlab.factors.volume.delta_negative_volume_indicator import DeltaNegativeVolumeIndicator +from factorlab.factors.volume.product_price_volume import ProductPriceVolume +from factorlab.factors.volume.sum_price_volume import SumPriceVolume +from factorlab.factors.volume.delta_product_price_volume import DeltaProductPriceVolume +from factorlab.factors.volume.delta_sum_price_volume import DeltaSumPriceVolume +from factorlab.utils import to_dataframe + + +class Volume(Factor): + """Factory class for volume factors.""" + + _METHOD_MAP: ClassVar[Dict[str, Type[BaseTransform]]] = { + "volume_momentum": VolumeMomentum, + "delta_volume_momentum": DeltaVolumeMomentum, + "volume_weighted_ma_over_ma": VolumeWeightedMAOverMA, + "diff_volume_weighted_ma_over_ma": DiffVolumeWeightedMAOverMA, + "price_volume_fit": PriceVolumeFit, + "diff_price_volume_fit": DiffPriceVolumeFit, + "delta_price_volume_fit": DeltaPriceVolumeFit, + "on_balance_volume": OnBalanceVolume, + "delta_on_balance_volume": DeltaOnBalanceVolume, + "positive_volume_indicator": PositiveVolumeIndicator, + "delta_positive_volume_indicator": DeltaPositiveVolumeIndicator, + "negative_volume_indicator": NegativeVolumeIndicator, + "delta_negative_volume_indicator": DeltaNegativeVolumeIndicator, + "product_price_volume": ProductPriceVolume, + "sum_price_volume": SumPriceVolume, + "delta_product_price_volume": DeltaProductPriceVolume, + "delta_sum_price_volume": DeltaSumPriceVolume, + } + + _ALIASES: ClassVar[Dict[str, str]] = { + "vmom": "volume_momentum", + "dvmom": "delta_volume_momentum", + "vwmama": "volume_weighted_ma_over_ma", + "dvwmama": "diff_volume_weighted_ma_over_ma", + "pvf": "price_volume_fit", + "difpvf": "diff_price_volume_fit", + "dpvf": "delta_price_volume_fit", + "obv": "on_balance_volume", + "dobv": "delta_on_balance_volume", + "pvi": "positive_volume_indicator", + "dpvi": "delta_positive_volume_indicator", + "nvi": "negative_volume_indicator", + "dnvi": "delta_negative_volume_indicator", + "ppv": "product_price_volume", + "spv": "sum_price_volume", + "dppv": "delta_product_price_volume", + "dspv": "delta_sum_price_volume", + } + + @classmethod + def get_factor_metadata(cls) -> pd.DataFrame: + data = [] + for alias, factor_class in cls._METHOD_MAP.items(): + try: + factor_instance = factor_class() + data.append( + { + "Alias": alias, + "Class": factor_class.__name__, + "Description": factor_instance.description, + } + ) + except Exception as exc: + data.append( + { + "Alias": alias, + "Class": factor_class.__name__, + "Description": f"Instantiation Failed: {exc}", + } + ) + + return pd.DataFrame(data).set_index("Alias") + + def __init__(self, method: str = "volume_momentum", **kwargs): + super().__init__( + name="Volume", + description="A factory for volume-based factors.", + category="Volume", + ) + + method = method.lower().strip() + self.method = self._ALIASES.get(method, method) + self.kwargs = kwargs + + if self.method not in self._METHOD_MAP: + raise ValueError( + f"Invalid volume factor method '{self.method}'. " + f"Method must be one of: {list(self._METHOD_MAP.keys())}" + ) + + factor_class = self._METHOD_MAP[self.method] + self._factor: Factor = factor_class(**self.kwargs) + + @property + def inputs(self) -> list[str]: + return self._factor.inputs + + def fit( + self, + X: Union[pd.Series, pd.DataFrame], + y: Optional[Union[pd.Series, pd.DataFrame]] = None, + ) -> "Volume": + df_input = to_dataframe(X) + self.validate_inputs(df_input) + self._factor.fit(df_input) + self._is_fitted = True + return self + + def transform(self, data: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame: + if not self._is_fitted: + raise RuntimeError("Volume transform must be fitted before calling transform().") + + return self._factor.transform(data) diff --git a/src/factorlab/factors/volume/volume_momentum.py b/src/factorlab/factors/volume/volume_momentum.py new file mode 100644 index 0000000..8aeebc7 --- /dev/null +++ b/src/factorlab/factors/volume/volume_momentum.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class VolumeMomentum(VolumeFactor): + def __init__(self, hist_length: int = 20, multiplier: int = 4, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.multiplier = multiplier + self.name = "VolumeMomentum" + self.description = "Short-vs-long volume momentum ratio." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}_{self.multiplier}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_volume_momentum(df, self.hist_length, self.multiplier) diff --git a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py new file mode 100644 index 0000000..3800a69 --- /dev/null +++ b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.volume.base import VolumeFactor + + +class VolumeWeightedMAOverMA(VolumeFactor): + def __init__(self, hist_length: int = 50, **kwargs): + super().__init__(**kwargs) + self.hist_length = hist_length + self.name = "VolumeWeightedMAOverMA" + self.description = "Log ratio of VWMA over MA." + + def _generate_name(self) -> str: + return self.output_col or f"{self.name}_{self.hist_length}" + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + return self._raw_volume_weighted_ma_over_ma(df, self.hist_length) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..2bbf859 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,10 @@ +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[1] +SRC = ROOT / "src" + +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + diff --git a/tests/features/test_volume_factors.py b/tests/features/test_volume_factors.py new file mode 100644 index 0000000..3817558 --- /dev/null +++ b/tests/features/test_volume_factors.py @@ -0,0 +1,127 @@ +import numpy as np +import pandas as pd +import pytest +from pathlib import Path + +from factorlab.factors.volume import Volume + + +FACTOR_SPECS = [ + ("volume_momentum", {"hist_length": 20, "multiplier": 4}), + ("delta_volume_momentum", {"hist_length": 20, "multiplier": 4, "delta_len": 100}), + ("volume_weighted_ma_over_ma", {"hist_length": 50}), + ("diff_volume_weighted_ma_over_ma", {"short_dist": 20, "long_dist": 100}), + ("price_volume_fit", {"hist_length": 50}), + ("diff_price_volume_fit", {"short_dist": 20, "long_dist": 100}), + ("delta_price_volume_fit", {"hist_length": 20, "delta_dist": 30}), + ("on_balance_volume", {"hist_length": 50}), + ("delta_on_balance_volume", {"hist_length": 50, "delta_dist": 45}), + ("positive_volume_indicator", {"hist_length": 40}), + ("delta_positive_volume_indicator", {"hist_length": 40, "delta_dist": 35}), + ("negative_volume_indicator", {"hist_length": 40}), + ("delta_negative_volume_indicator", {"hist_length": 40, "delta_dist": 35}), + ("product_price_volume", {"hist_length": 25}), + ("sum_price_volume", {"hist_length": 25}), + ("delta_product_price_volume", {"hist_length": 40, "delta_dist": 35}), + ("delta_sum_price_volume", {"hist_length": 40, "delta_dist": 35}), +] + + +@pytest.fixture(scope="module") +def crypto_universe() -> pd.DataFrame: + data_path = Path(__file__).resolve().parents[1] / "datasets" / "data" / "binance_spot_prices.csv" + df = pd.read_csv( + data_path, + index_col=["date", "ticker"], + parse_dates=["date"], + ) + df = df.sort_index() + + # keep symbols with at least 300 daily bars + counts = df.groupby(level=1).size() + keep = counts[counts >= 300].index + df = df[df.index.get_level_values(1).isin(keep)] + + # keep a liquid subset to keep tests fast and stable + avg_notional = (df["close"] * df["volume"]).groupby(level=1).mean() + top_symbols = avg_notional.nlargest(60).index + df = df[df.index.get_level_values(1).isin(top_symbols)] + + return df[["open", "high", "low", "close", "volume"]] + + +@pytest.mark.parametrize("method,kwargs", FACTOR_SPECS) +def test_volume_factor_methods_smoke(crypto_universe: pd.DataFrame, method: str, kwargs: dict) -> None: + factor = Volume(method=method, **kwargs) + out = factor.compute(crypto_universe) + + created_cols = [col for col in out.columns if col not in crypto_universe.columns] + assert len(created_cols) == 1 + + factor_col = created_cols[0] + values = out[factor_col].dropna() + + assert len(values) > 0 + assert (values <= 50).all() + assert (values >= -50).all() + + pd.testing.assert_frame_equal(out[crypto_universe.columns], crypto_universe) + assert out.index.equals(crypto_universe.index) + + +def test_volume_factor_crypto_rank_ic_smoke(crypto_universe: pd.DataFrame) -> None: + close = crypto_universe["close"] + volume = crypto_universe["volume"] + + fwd_ret = close.groupby(level=1).shift(-1).div(close) - 1.0 + + # daily tradable universe proxy: top 40 by 20-day average notional + notional = close * volume + liquidity = ( + notional.groupby(level=1) + .rolling(window=20, min_periods=20) + .mean() + .droplevel(0) + .sort_index() + ) + eligible = liquidity.groupby(level=0).rank(ascending=False, method="first") <= 40 + + rows = [] + for method, kwargs in FACTOR_SPECS: + factor = Volume(method=method, **kwargs) + out = factor.compute(crypto_universe) + factor_col = [col for col in out.columns if col not in crypto_universe.columns][0] + + panel = pd.concat( + [ + out[factor_col].rename("factor"), + fwd_ret.rename("fwd_ret"), + eligible.rename("eligible"), + ], + axis=1, + ) + panel = panel[panel["eligible"]].dropna() + + daily_ic = panel.groupby(level=0).apply( + lambda g: g["factor"].corr(g["fwd_ret"], method="spearman") if g.shape[0] >= 12 else np.nan + ) + + n_obs = int(daily_ic.notna().sum()) + mean_ic = float(daily_ic.mean()) if n_obs > 0 else np.nan + std_ic = float(daily_ic.std()) if n_obs > 1 else np.nan + + rows.append( + { + "method": method, + "n_obs": n_obs, + "mean_ic": mean_ic, + "std_ic": std_ic, + } + ) + + summary = pd.DataFrame(rows).set_index("method") + + assert summary.shape[0] == len(FACTOR_SPECS) + assert (summary["n_obs"] >= 30).sum() >= 12 + assert np.isfinite(summary["mean_ic"].dropna()).all() + assert (summary["mean_ic"].dropna().abs() <= 1).all() From cb5d0955480ec8bac28fae6964f599ca1fef799f Mon Sep 17 00:00:00 2001 From: Misha Kutsovsky Date: Wed, 18 Feb 2026 11:45:20 -0500 Subject: [PATCH 2/6] Refactor volume factors to per-indicator readable implementations --- src/factorlab/factors/volume/base.py | 152 ------------------ .../volume/delta_negative_volume_indicator.py | 19 ++- .../factors/volume/delta_on_balance_volume.py | 12 +- .../volume/delta_positive_volume_indicator.py | 19 ++- .../factors/volume/delta_price_volume_fit.py | 17 +- .../volume/delta_product_price_volume.py | 52 +++++- .../factors/volume/delta_sum_price_volume.py | 53 +++++- .../factors/volume/delta_volume_momentum.py | 6 +- .../factors/volume/diff_price_volume_fit.py | 21 ++- .../volume/diff_volume_weighted_ma_over_ma.py | 17 +- .../volume/negative_volume_indicator.py | 19 ++- .../factors/volume/on_balance_volume.py | 12 +- .../volume/positive_volume_indicator.py | 19 ++- .../factors/volume/price_volume_fit.py | 16 +- .../factors/volume/product_price_volume.py | 52 +++++- .../factors/volume/sum_price_volume.py | 53 +++++- .../factors/volume/volume_momentum.py | 7 +- .../volume/volume_weighted_ma_over_ma.py | 12 +- 18 files changed, 367 insertions(+), 191 deletions(-) diff --git a/src/factorlab/factors/volume/base.py b/src/factorlab/factors/volume/base.py index bf9b068..f10c159 100644 --- a/src/factorlab/factors/volume/base.py +++ b/src/factorlab/factors/volume/base.py @@ -125,155 +125,3 @@ def _compress(self, raw: pd.Series) -> pd.Series: normalized = raw / robust_scale return 50.0 * np.tanh(self.compression_strength * normalized) - - # Shared raw components - def _raw_volume_momentum(self, df: pd.DataFrame, hist_length: int, multiplier: int) -> pd.Series: - volume = df[self.volume_col] - short_ma = self._rolling_stat(volume, window=hist_length, stat="mean") - long_ma = self._rolling_stat(volume, window=hist_length * multiplier, stat="mean") - return self._safe_log(short_ma / long_ma.replace(0, np.nan)) - - def _raw_volume_weighted_ma_over_ma(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - pv = close * volume - - vwma = self._rolling_stat(pv, window=hist_length, stat="sum") / self._rolling_stat( - volume, window=hist_length, stat="sum" - ).replace(0, np.nan) - ma = self._rolling_stat(close, window=hist_length, stat="mean") - - return self._safe_log(vwma / ma.replace(0, np.nan)) - - def _raw_price_volume_fit(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - x = self._safe_log(volume) - y = self._safe_log(close) - - mean_x = self._rolling_stat(x, window=hist_length, stat="mean") - mean_y = self._rolling_stat(y, window=hist_length, stat="mean") - mean_xy = self._rolling_stat(x * y, window=hist_length, stat="mean") - mean_x2 = self._rolling_stat(x * x, window=hist_length, stat="mean") - - cov_xy = mean_xy - (mean_x * mean_y) - var_x = mean_x2 - (mean_x * mean_x) - return cov_xy / var_x.replace(0, np.nan) - - def _raw_on_balance_volume(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - close_diff = self._diff_by_asset(close, 1) - signed_volume = volume * np.sign(close_diff) - - signed_sum = self._rolling_stat(signed_volume, window=hist_length, stat="sum") - total_sum = self._rolling_stat(volume, window=hist_length, stat="sum") - return signed_sum / total_sum.replace(0, np.nan) - - def _raw_positive_volume_indicator(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - rel_change = self._pct_change_by_asset(close, periods=1) - prev_volume = self._shift_by_asset(volume, 1) - filtered = rel_change.where(volume > prev_volume, 0.0) - - avg_change = self._rolling_stat(filtered, window=hist_length, stat="mean") - norm_window = max(2 * hist_length, 250) - std_change = self._rolling_stat( - rel_change, - window=norm_window, - stat="std", - min_periods=hist_length, - ).replace(0, np.nan) - return avg_change / std_change - - def _raw_negative_volume_indicator(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - rel_change = self._pct_change_by_asset(close, periods=1) - prev_volume = self._shift_by_asset(volume, 1) - filtered = rel_change.where(volume < prev_volume, 0.0) - - avg_change = self._rolling_stat(filtered, window=hist_length, stat="mean") - norm_window = max(2 * hist_length, 250) - std_change = self._rolling_stat( - rel_change, - window=norm_window, - stat="std", - min_periods=hist_length, - ).replace(0, np.nan) - return avg_change / std_change - - def _normalized_volume_and_price_change( - self, - df: pd.DataFrame, - norm_lookback: int, - norm_min_periods: int, - ) -> tuple[pd.Series, pd.Series]: - close = df[self.price_col] - volume = df[self.volume_col] - - prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_stat( - prior_volume, - window=norm_lookback, - stat="median", - min_periods=norm_min_periods, - ).replace(0, np.nan) - normalized_volume = volume / median_volume - - log_close = self._safe_log(close) - price_change = self._diff_by_asset(log_close, 1) - prior_change = self._shift_by_asset(price_change, 1) - - median_change = self._rolling_stat( - prior_change, - window=norm_lookback, - stat="median", - min_periods=norm_min_periods, - ) - q75 = self._rolling_stat( - prior_change, - window=norm_lookback, - stat="quantile", - min_periods=norm_min_periods, - q=0.75, - ) - q25 = self._rolling_stat( - prior_change, - window=norm_lookback, - stat="quantile", - min_periods=norm_min_periods, - q=0.25, - ) - iqr = (q75 - q25).replace(0, np.nan) - - normalized_change = (price_change - median_change) / iqr - return normalized_volume, normalized_change - - def _raw_product_price_volume( - self, - df: pd.DataFrame, - hist_length: int, - norm_lookback: int = 250, - norm_min_periods: int = 50, - ) -> pd.Series: - norm_vol, norm_change = self._normalized_volume_and_price_change(df, norm_lookback, norm_min_periods) - precursor = norm_vol * norm_change - return self._rolling_stat(precursor, window=hist_length, stat="mean") - - def _raw_sum_price_volume( - self, - df: pd.DataFrame, - hist_length: int, - norm_lookback: int = 250, - norm_min_periods: int = 50, - ) -> pd.Series: - norm_vol, norm_change = self._normalized_volume_and_price_change(df, norm_lookback, norm_min_periods) - precursor = norm_vol + norm_change.abs() - precursor = precursor.where(norm_change >= 0, -precursor) - return self._rolling_stat(precursor, window=hist_length, stat="mean") diff --git a/src/factorlab/factors/volume/delta_negative_volume_indicator.py b/src/factorlab/factors/volume/delta_negative_volume_indicator.py index 53ad774..447db2d 100644 --- a/src/factorlab/factors/volume/delta_negative_volume_indicator.py +++ b/src/factorlab/factors/volume/delta_negative_volume_indicator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -17,5 +18,21 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - nvi = self._raw_negative_volume_indicator(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + rel_change = self._pct_change_by_asset(close, periods=1) + prev_volume = self._shift_by_asset(volume, 1) + filtered = rel_change.where(volume < prev_volume, 0.0) + + avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + norm_window = max(2 * self.hist_length, 250) + std_change = self._rolling_stat( + rel_change, + window=norm_window, + stat="std", + min_periods=self.hist_length, + ).replace(0, np.nan) + nvi = avg_change / std_change + return nvi - self._shift_by_asset(nvi, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_on_balance_volume.py b/src/factorlab/factors/volume/delta_on_balance_volume.py index c0e6b90..1543a2d 100644 --- a/src/factorlab/factors/volume/delta_on_balance_volume.py +++ b/src/factorlab/factors/volume/delta_on_balance_volume.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -17,5 +18,14 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - obv = self._raw_on_balance_volume(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + close_diff = self._diff_by_asset(close, 1) + signed_volume = volume * np.sign(close_diff) + + signed_sum = self._rolling_stat(signed_volume, window=self.hist_length, stat="sum") + total_sum = self._rolling_stat(volume, window=self.hist_length, stat="sum") + obv = signed_sum / total_sum.replace(0, np.nan) + return obv - self._shift_by_asset(obv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_positive_volume_indicator.py b/src/factorlab/factors/volume/delta_positive_volume_indicator.py index 6b73849..90b6d94 100644 --- a/src/factorlab/factors/volume/delta_positive_volume_indicator.py +++ b/src/factorlab/factors/volume/delta_positive_volume_indicator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -17,5 +18,21 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - pvi = self._raw_positive_volume_indicator(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + rel_change = self._pct_change_by_asset(close, periods=1) + prev_volume = self._shift_by_asset(volume, 1) + filtered = rel_change.where(volume > prev_volume, 0.0) + + avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + norm_window = max(2 * self.hist_length, 250) + std_change = self._rolling_stat( + rel_change, + window=norm_window, + stat="std", + min_periods=self.hist_length, + ).replace(0, np.nan) + pvi = avg_change / std_change + return pvi - self._shift_by_asset(pvi, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_price_volume_fit.py b/src/factorlab/factors/volume/delta_price_volume_fit.py index c87d65b..16cccb0 100644 --- a/src/factorlab/factors/volume/delta_price_volume_fit.py +++ b/src/factorlab/factors/volume/delta_price_volume_fit.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -17,5 +18,19 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - pvf = self._raw_price_volume_fit(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + x = self._safe_log(volume) + y = self._safe_log(close) + + mean_x = self._rolling_stat(x, window=self.hist_length, stat="mean") + mean_y = self._rolling_stat(y, window=self.hist_length, stat="mean") + mean_xy = self._rolling_stat(x * y, window=self.hist_length, stat="mean") + mean_x2 = self._rolling_stat(x * x, window=self.hist_length, stat="mean") + + cov_xy = mean_xy - (mean_x * mean_y) + var_x = mean_x2 - (mean_x * mean_x) + pvf = cov_xy / var_x.replace(0, np.nan) + return pvf - self._shift_by_asset(pvf, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_product_price_volume.py b/src/factorlab/factors/volume/delta_product_price_volume.py index 7fb057a..cc63b33 100644 --- a/src/factorlab/factors/volume/delta_product_price_volume.py +++ b/src/factorlab/factors/volume/delta_product_price_volume.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -25,11 +26,50 @@ def __init__( def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - ppv = self._raw_product_price_volume( - df, - hist_length=self.hist_length, - norm_lookback=self.norm_lookback, - norm_min_periods=self.norm_min_periods, + def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: + close = df[self.price_col] + volume = df[self.volume_col] + + prior_volume = self._shift_by_asset(volume, 1) + median_volume = self._rolling_stat( + prior_volume, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ).replace(0, np.nan) + normalized_volume = volume / median_volume + + log_close = self._safe_log(close) + price_change = self._diff_by_asset(log_close, 1) + prior_change = self._shift_by_asset(price_change, 1) + + median_change = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ) + q75 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.75, ) + q25 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.25, + ) + iqr = (q75 - q25).replace(0, np.nan) + + normalized_change = (price_change - median_change) / iqr + return normalized_volume, normalized_change + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) + precursor = normalized_volume * normalized_change + ppv = self._rolling_stat(precursor, window=self.hist_length, stat="mean") return ppv - self._shift_by_asset(ppv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_sum_price_volume.py b/src/factorlab/factors/volume/delta_sum_price_volume.py index bdd0fa6..3f1e64e 100644 --- a/src/factorlab/factors/volume/delta_sum_price_volume.py +++ b/src/factorlab/factors/volume/delta_sum_price_volume.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -25,11 +26,51 @@ def __init__( def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - spv = self._raw_sum_price_volume( - df, - hist_length=self.hist_length, - norm_lookback=self.norm_lookback, - norm_min_periods=self.norm_min_periods, + def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: + close = df[self.price_col] + volume = df[self.volume_col] + + prior_volume = self._shift_by_asset(volume, 1) + median_volume = self._rolling_stat( + prior_volume, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ).replace(0, np.nan) + normalized_volume = volume / median_volume + + log_close = self._safe_log(close) + price_change = self._diff_by_asset(log_close, 1) + prior_change = self._shift_by_asset(price_change, 1) + + median_change = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ) + q75 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.75, ) + q25 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.25, + ) + iqr = (q75 - q25).replace(0, np.nan) + + normalized_change = (price_change - median_change) / iqr + return normalized_volume, normalized_change + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) + precursor = normalized_volume + normalized_change.abs() + precursor = precursor.where(normalized_change >= 0, -precursor) + spv = self._rolling_stat(precursor, window=self.hist_length, stat="mean") return spv - self._shift_by_asset(spv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_volume_momentum.py b/src/factorlab/factors/volume/delta_volume_momentum.py index b37b94b..231c3b3 100644 --- a/src/factorlab/factors/volume/delta_volume_momentum.py +++ b/src/factorlab/factors/volume/delta_volume_momentum.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -18,5 +19,8 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.multiplier}_{self.delta_len}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - vmom = self._raw_volume_momentum(df, self.hist_length, self.multiplier) + volume = df[self.volume_col] + short_ma = self._rolling_stat(volume, window=self.hist_length, stat="mean") + long_ma = self._rolling_stat(volume, window=self.hist_length * self.multiplier, stat="mean") + vmom = self._safe_log(short_ma / long_ma.replace(0, np.nan)) return vmom - self._shift_by_asset(vmom, self.delta_len) diff --git a/src/factorlab/factors/volume/diff_price_volume_fit.py b/src/factorlab/factors/volume/diff_price_volume_fit.py index c68d59f..621b63c 100644 --- a/src/factorlab/factors/volume/diff_price_volume_fit.py +++ b/src/factorlab/factors/volume/diff_price_volume_fit.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,7 +17,23 @@ def __init__(self, short_dist: int = 20, long_dist: int = 100, **kwargs): def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.short_dist}_{self.long_dist}" + def _pv_fit(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + + x = self._safe_log(volume) + y = self._safe_log(close) + + mean_x = self._rolling_stat(x, window=hist_length, stat="mean") + mean_y = self._rolling_stat(y, window=hist_length, stat="mean") + mean_xy = self._rolling_stat(x * y, window=hist_length, stat="mean") + mean_x2 = self._rolling_stat(x * x, window=hist_length, stat="mean") + + cov_xy = mean_xy - (mean_x * mean_y) + var_x = mean_x2 - (mean_x * mean_x) + return cov_xy / var_x.replace(0, np.nan) + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - short = self._raw_price_volume_fit(df, self.short_dist) - long = self._raw_price_volume_fit(df, self.long_dist) + short = self._pv_fit(df, self.short_dist) + long = self._pv_fit(df, self.long_dist) return short - long diff --git a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py index 0c4e57a..5f34833 100644 --- a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py +++ b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,7 +17,19 @@ def __init__(self, short_dist: int = 20, long_dist: int = 100, **kwargs): def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.short_dist}_{self.long_dist}" + def _vwma_over_ma(self, df: pd.DataFrame, hist_length: int) -> pd.Series: + close = df[self.price_col] + volume = df[self.volume_col] + + pv = close * volume + vwma = self._rolling_stat(pv, window=hist_length, stat="sum") / self._rolling_stat( + volume, window=hist_length, stat="sum" + ).replace(0, np.nan) + ma = self._rolling_stat(close, window=hist_length, stat="mean") + + return self._safe_log(vwma / ma.replace(0, np.nan)) + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - short = self._raw_volume_weighted_ma_over_ma(df, self.short_dist) - long = self._raw_volume_weighted_ma_over_ma(df, self.long_dist) + short = self._vwma_over_ma(df, self.short_dist) + long = self._vwma_over_ma(df, self.long_dist) return short - long diff --git a/src/factorlab/factors/volume/negative_volume_indicator.py b/src/factorlab/factors/volume/negative_volume_indicator.py index 320809b..abc5731 100644 --- a/src/factorlab/factors/volume/negative_volume_indicator.py +++ b/src/factorlab/factors/volume/negative_volume_indicator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,4 +17,20 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_negative_volume_indicator(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + rel_change = self._pct_change_by_asset(close, periods=1) + prev_volume = self._shift_by_asset(volume, 1) + filtered = rel_change.where(volume < prev_volume, 0.0) + + avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + norm_window = max(2 * self.hist_length, 250) + std_change = self._rolling_stat( + rel_change, + window=norm_window, + stat="std", + min_periods=self.hist_length, + ).replace(0, np.nan) + + return avg_change / std_change diff --git a/src/factorlab/factors/volume/on_balance_volume.py b/src/factorlab/factors/volume/on_balance_volume.py index f7505cf..fad29da 100644 --- a/src/factorlab/factors/volume/on_balance_volume.py +++ b/src/factorlab/factors/volume/on_balance_volume.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,4 +17,13 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_on_balance_volume(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + close_diff = self._diff_by_asset(close, 1) + signed_volume = volume * np.sign(close_diff) + + signed_sum = self._rolling_stat(signed_volume, window=self.hist_length, stat="sum") + total_sum = self._rolling_stat(volume, window=self.hist_length, stat="sum") + + return signed_sum / total_sum.replace(0, np.nan) diff --git a/src/factorlab/factors/volume/positive_volume_indicator.py b/src/factorlab/factors/volume/positive_volume_indicator.py index fc89a9e..f378427 100644 --- a/src/factorlab/factors/volume/positive_volume_indicator.py +++ b/src/factorlab/factors/volume/positive_volume_indicator.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,4 +17,20 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_positive_volume_indicator(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + rel_change = self._pct_change_by_asset(close, periods=1) + prev_volume = self._shift_by_asset(volume, 1) + filtered = rel_change.where(volume > prev_volume, 0.0) + + avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + norm_window = max(2 * self.hist_length, 250) + std_change = self._rolling_stat( + rel_change, + window=norm_window, + stat="std", + min_periods=self.hist_length, + ).replace(0, np.nan) + + return avg_change / std_change diff --git a/src/factorlab/factors/volume/price_volume_fit.py b/src/factorlab/factors/volume/price_volume_fit.py index c86cd22..1681dde 100644 --- a/src/factorlab/factors/volume/price_volume_fit.py +++ b/src/factorlab/factors/volume/price_volume_fit.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,4 +17,17 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_price_volume_fit(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + x = self._safe_log(volume) + y = self._safe_log(close) + + mean_x = self._rolling_stat(x, window=self.hist_length, stat="mean") + mean_y = self._rolling_stat(y, window=self.hist_length, stat="mean") + mean_xy = self._rolling_stat(x * y, window=self.hist_length, stat="mean") + mean_x2 = self._rolling_stat(x * x, window=self.hist_length, stat="mean") + + cov_xy = mean_xy - (mean_x * mean_y) + var_x = mean_x2 - (mean_x * mean_x) + return cov_xy / var_x.replace(0, np.nan) diff --git a/src/factorlab/factors/volume/product_price_volume.py b/src/factorlab/factors/volume/product_price_volume.py index 86a165a..f463094 100644 --- a/src/factorlab/factors/volume/product_price_volume.py +++ b/src/factorlab/factors/volume/product_price_volume.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -23,10 +24,49 @@ def __init__( def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_product_price_volume( - df, - hist_length=self.hist_length, - norm_lookback=self.norm_lookback, - norm_min_periods=self.norm_min_periods, + def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: + close = df[self.price_col] + volume = df[self.volume_col] + + prior_volume = self._shift_by_asset(volume, 1) + median_volume = self._rolling_stat( + prior_volume, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ).replace(0, np.nan) + normalized_volume = volume / median_volume + + log_close = self._safe_log(close) + price_change = self._diff_by_asset(log_close, 1) + prior_change = self._shift_by_asset(price_change, 1) + + median_change = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ) + q75 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.75, ) + q25 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.25, + ) + iqr = (q75 - q25).replace(0, np.nan) + + normalized_change = (price_change - median_change) / iqr + return normalized_volume, normalized_change + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) + precursor = normalized_volume * normalized_change + return self._rolling_stat(precursor, window=self.hist_length, stat="mean") diff --git a/src/factorlab/factors/volume/sum_price_volume.py b/src/factorlab/factors/volume/sum_price_volume.py index da77c04..a7e2cc5 100644 --- a/src/factorlab/factors/volume/sum_price_volume.py +++ b/src/factorlab/factors/volume/sum_price_volume.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -23,10 +24,50 @@ def __init__( def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_sum_price_volume( - df, - hist_length=self.hist_length, - norm_lookback=self.norm_lookback, - norm_min_periods=self.norm_min_periods, + def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: + close = df[self.price_col] + volume = df[self.volume_col] + + prior_volume = self._shift_by_asset(volume, 1) + median_volume = self._rolling_stat( + prior_volume, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ).replace(0, np.nan) + normalized_volume = volume / median_volume + + log_close = self._safe_log(close) + price_change = self._diff_by_asset(log_close, 1) + prior_change = self._shift_by_asset(price_change, 1) + + median_change = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="median", + min_periods=self.norm_min_periods, + ) + q75 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.75, ) + q25 = self._rolling_stat( + prior_change, + window=self.norm_lookback, + stat="quantile", + min_periods=self.norm_min_periods, + q=0.25, + ) + iqr = (q75 - q25).replace(0, np.nan) + + normalized_change = (price_change - median_change) / iqr + return normalized_volume, normalized_change + + def _compute_volume(self, df: pd.DataFrame) -> pd.Series: + normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) + precursor = normalized_volume + normalized_change.abs() + precursor = precursor.where(normalized_change >= 0, -precursor) + return self._rolling_stat(precursor, window=self.hist_length, stat="mean") diff --git a/src/factorlab/factors/volume/volume_momentum.py b/src/factorlab/factors/volume/volume_momentum.py index 8aeebc7..44f43e0 100644 --- a/src/factorlab/factors/volume/volume_momentum.py +++ b/src/factorlab/factors/volume/volume_momentum.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -17,4 +18,8 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}_{self.multiplier}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_volume_momentum(df, self.hist_length, self.multiplier) + volume = df[self.volume_col] + short_ma = self._rolling_stat(volume, window=self.hist_length, stat="mean") + long_ma = self._rolling_stat(volume, window=self.hist_length * self.multiplier, stat="mean") + ratio = short_ma / long_ma.replace(0, np.nan) + return self._safe_log(ratio) diff --git a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py index 3800a69..6077919 100644 --- a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py +++ b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py @@ -1,5 +1,6 @@ from __future__ import annotations +import numpy as np import pandas as pd from factorlab.factors.volume.base import VolumeFactor @@ -16,4 +17,13 @@ def _generate_name(self) -> str: return self.output_col or f"{self.name}_{self.hist_length}" def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - return self._raw_volume_weighted_ma_over_ma(df, self.hist_length) + close = df[self.price_col] + volume = df[self.volume_col] + + pv = close * volume + vwma = self._rolling_stat(pv, window=self.hist_length, stat="sum") / self._rolling_stat( + volume, window=self.hist_length, stat="sum" + ).replace(0, np.nan) + ma = self._rolling_stat(close, window=self.hist_length, stat="mean") + + return self._safe_log(vwma / ma.replace(0, np.nan)) From cf1d39c51a0500cf39538396546c051299ba93f7 Mon Sep 17 00:00:00 2001 From: Misha Kutsovsky Date: Wed, 18 Feb 2026 11:47:18 -0500 Subject: [PATCH 3/6] Remove standalone scripts entrypoint from volume factor PR --- scripts/evaluate_volume_factors_crypto.py | 171 ---------------------- 1 file changed, 171 deletions(-) delete mode 100644 scripts/evaluate_volume_factors_crypto.py diff --git a/scripts/evaluate_volume_factors_crypto.py b/scripts/evaluate_volume_factors_crypto.py deleted file mode 100644 index 9a41b2c..0000000 --- a/scripts/evaluate_volume_factors_crypto.py +++ /dev/null @@ -1,171 +0,0 @@ -from __future__ import annotations - -import argparse -import sys -from pathlib import Path -from typing import Dict, List, Tuple - -import numpy as np -import pandas as pd - - -ROOT = Path(__file__).resolve().parents[1] -SRC = ROOT / "src" -if str(SRC) not in sys.path: - sys.path.insert(0, str(SRC)) - -from factorlab.factors.volume import Volume # noqa: E402 - - -FACTOR_SPECS: List[Tuple[str, Dict[str, int]]] = [ - ("volume_momentum", {"hist_length": 20, "multiplier": 4}), - ("delta_volume_momentum", {"hist_length": 20, "multiplier": 4, "delta_len": 100}), - ("volume_weighted_ma_over_ma", {"hist_length": 50}), - ("diff_volume_weighted_ma_over_ma", {"short_dist": 20, "long_dist": 100}), - ("price_volume_fit", {"hist_length": 50}), - ("diff_price_volume_fit", {"short_dist": 20, "long_dist": 100}), - ("delta_price_volume_fit", {"hist_length": 20, "delta_dist": 30}), - ("on_balance_volume", {"hist_length": 50}), - ("delta_on_balance_volume", {"hist_length": 50, "delta_dist": 45}), - ("positive_volume_indicator", {"hist_length": 40}), - ("delta_positive_volume_indicator", {"hist_length": 40, "delta_dist": 35}), - ("negative_volume_indicator", {"hist_length": 40}), - ("delta_negative_volume_indicator", {"hist_length": 40, "delta_dist": 35}), - ("product_price_volume", {"hist_length": 25}), - ("sum_price_volume", {"hist_length": 25}), - ("delta_product_price_volume", {"hist_length": 40, "delta_dist": 35}), - ("delta_sum_price_volume", {"hist_length": 40, "delta_dist": 35}), -] - - -def load_crypto_ohlcv(data_dir: Path, max_symbols: int) -> pd.DataFrame: - files = sorted(data_dir.glob("*.csv"))[:max_symbols] - if not files: - raise FileNotFoundError(f"No CSV files found in: {data_dir}") - - parts = [] - for path in files: - try: - tmp = pd.read_csv( - path, - usecols=["open_time", "open", "high", "low", "close", "volume", "ticker"], - ) - except Exception: - continue - - if tmp.empty: - continue - - tmp["date"] = pd.to_datetime(tmp["open_time"], errors="coerce") - tmp = tmp.dropna(subset=["date", "ticker"]) - tmp = tmp.set_index(["date", "ticker"]).sort_index() - parts.append(tmp[["open", "high", "low", "close", "volume"]]) - - if not parts: - raise RuntimeError(f"Could not load usable OHLCV data from: {data_dir}") - - df = pd.concat(parts, axis=0).sort_index() - counts = df.groupby(level=1).size() - keep = counts[counts >= 365].index - df = df[df.index.get_level_values(1).isin(keep)] - return df - - -def evaluate(df: pd.DataFrame, top_n: int, ann_factor: int = 365) -> pd.DataFrame: - close = df["close"] - volume = df["volume"] - fwd_ret = close.groupby(level=1).shift(-1).div(close) - 1.0 - - notional = close * volume - liquidity = ( - notional.groupby(level=1) - .rolling(window=20, min_periods=20) - .mean() - .droplevel(0) - .sort_index() - ) - eligible = liquidity.groupby(level=0).rank(ascending=False, method="first") <= top_n - - rows = [] - for method, kwargs in FACTOR_SPECS: - factor = Volume(method=method, **kwargs) - out = factor.compute(df) - new_col = [c for c in out.columns if c not in df.columns][0] - - panel = pd.concat( - [ - out[new_col].rename("factor"), - fwd_ret.rename("fwd_ret"), - eligible.rename("eligible"), - ], - axis=1, - ) - panel = panel[panel["eligible"]].dropna() - if panel.empty: - continue - - daily_ic = panel.groupby(level=0).apply( - lambda g: g["factor"].corr(g["fwd_ret"], method="spearman") if g.shape[0] >= 10 else np.nan - ) - - ranked = panel.copy() - ranked["weight"] = ranked.groupby(level=0)["factor"].rank(pct=True) - 0.5 - gross = ranked.groupby(level=0)["weight"].transform(lambda s: s.abs().sum()) - ranked["weight"] = ranked["weight"] / gross.replace(0, np.nan) - ls_ret = (ranked["weight"] * ranked["fwd_ret"]).groupby(level=0).sum() - - mean_ic = float(daily_ic.mean()) if daily_ic.notna().any() else np.nan - std_ic = float(daily_ic.std()) if daily_ic.notna().sum() > 1 else np.nan - ic_ir = mean_ic / std_ic if std_ic and np.isfinite(std_ic) else np.nan - - ann_ret = float(ls_ret.mean() * ann_factor) if ls_ret.notna().any() else np.nan - ann_vol = float(ls_ret.std() * np.sqrt(ann_factor)) if ls_ret.notna().sum() > 1 else np.nan - sharpe_365 = ann_ret / ann_vol if ann_vol and np.isfinite(ann_vol) else np.nan - - rows.append( - { - "method": method, - "n_ic_obs": int(daily_ic.notna().sum()), - "mean_ic": mean_ic, - "ic_ir": ic_ir, - "ann_ret_365": ann_ret, - "ann_vol_365": ann_vol, - "sharpe_365": sharpe_365, - } - ) - - if not rows: - raise RuntimeError("No factors produced valid evaluation rows.") - - return pd.DataFrame(rows).set_index("method").sort_values("mean_ic", ascending=False) - - -def main() -> None: - parser = argparse.ArgumentParser(description="Evaluate volume factors on crypto universe.") - parser.add_argument( - "--data-dir", - type=Path, - default=Path("/Users/mikuts/astrofactor/astroblade/data/systamental/crypto/survivorship/binance_klines_history/daily/futures"), - help="Directory containing per-symbol daily OHLCV CSV files.", - ) - parser.add_argument("--max-symbols", type=int, default=120, help="Maximum number of symbol CSVs to load.") - parser.add_argument("--top-n", type=int, default=60, help="Top-N liquid assets used each day.") - parser.add_argument("--output", type=Path, default=None, help="Optional output CSV for summary.") - args = parser.parse_args() - - df = load_crypto_ohlcv(args.data_dir, args.max_symbols) - summary = evaluate(df, top_n=args.top_n, ann_factor=365) - - pd.set_option("display.width", 200) - pd.set_option("display.max_columns", 20) - print(summary.round(4)) - - if args.output is not None: - args.output.parent.mkdir(parents=True, exist_ok=True) - summary.to_csv(args.output) - print(f"\nSaved: {args.output}") - - -if __name__ == "__main__": - main() - From 0e24078acc20c9696b164769904ed76195cc791e Mon Sep 17 00:00:00 2001 From: Misha Kutsovsky Date: Wed, 18 Feb 2026 11:54:06 -0500 Subject: [PATCH 4/6] refactor(volume): align factor ops with feature transforms --- src/factorlab/factors/volume/base.py | 76 +++++++++++++++++-- .../volume/delta_negative_volume_indicator.py | 5 +- .../volume/delta_positive_volume_indicator.py | 5 +- .../factors/volume/delta_price_volume_fit.py | 8 +- .../volume/delta_product_price_volume.py | 8 +- .../factors/volume/delta_sum_price_volume.py | 8 +- .../factors/volume/delta_volume_momentum.py | 4 +- .../factors/volume/diff_price_volume_fit.py | 8 +- .../volume/diff_volume_weighted_ma_over_ma.py | 2 +- .../volume/negative_volume_indicator.py | 5 +- .../volume/positive_volume_indicator.py | 5 +- .../factors/volume/price_volume_fit.py | 8 +- .../factors/volume/product_price_volume.py | 8 +- .../factors/volume/sum_price_volume.py | 8 +- .../factors/volume/volume_momentum.py | 4 +- .../volume/volume_weighted_ma_over_ma.py | 2 +- 16 files changed, 106 insertions(+), 58 deletions(-) diff --git a/src/factorlab/factors/volume/base.py b/src/factorlab/factors/volume/base.py index f10c159..e898e4f 100644 --- a/src/factorlab/factors/volume/base.py +++ b/src/factorlab/factors/volume/base.py @@ -7,6 +7,9 @@ import pandas as pd from factorlab.factors.base import Factor +from factorlab.features.transforms.dispersion import Dispersion +from factorlab.features.transforms.returns import Difference, Returns +from factorlab.features.transforms.smoothing import WindowSmoother from factorlab.utils import to_dataframe @@ -81,20 +84,78 @@ def _is_multiindex(self, series: pd.Series) -> bool: def _safe_log(self, series: pd.Series) -> pd.Series: return np.log(series.where(series > 0, np.nan)) + def _series_frame(self, series: pd.Series, col: str = "value") -> pd.DataFrame: + return series.astype("float64").to_frame(col) + def _shift_by_asset(self, series: pd.Series, periods: int) -> pd.Series: if self._is_multiindex(series): return series.groupby(level=1).shift(periods) return series.shift(periods) def _pct_change_by_asset(self, series: pd.Series, periods: int = 1) -> pd.Series: - if self._is_multiindex(series): - return series.groupby(level=1).pct_change(periods=periods, fill_method=None) - return series.pct_change(periods=periods, fill_method=None) + df = self._series_frame(series) + ret = Returns(method="pct", input_col="value", output_col="ret", lags=periods).compute(df) + return ret["ret"] def _diff_by_asset(self, series: pd.Series, periods: int = 1) -> pd.Series: - if self._is_multiindex(series): - return series.groupby(level=1).diff(periods=periods) - return series.diff(periods=periods) + df = self._series_frame(series) + diff = Difference(input_col="value", output_col="diff", lags=periods).compute(df) + return diff["diff"] + + def _rolling_mean( + self, + series: pd.Series, + window: int, + min_periods: Optional[int] = None, + ) -> pd.Series: + min_periods = window if min_periods is None else min_periods + df = self._series_frame(series) + smoothed = WindowSmoother( + input_cols="value", + output_cols="mean", + window_type="rolling", + window_size=window, + central_tendency="mean", + min_periods=min_periods, + ).compute(df) + return smoothed["mean"] + + def _rolling_median( + self, + series: pd.Series, + window: int, + min_periods: Optional[int] = None, + ) -> pd.Series: + min_periods = window if min_periods is None else min_periods + df = self._series_frame(series) + smoothed = WindowSmoother( + input_cols="value", + output_cols="median", + window_type="rolling", + window_size=window, + central_tendency="median", + min_periods=min_periods, + ).compute(df) + return smoothed["median"] + + def _rolling_std( + self, + series: pd.Series, + window: int, + min_periods: Optional[int] = None, + ) -> pd.Series: + min_periods = 2 if min_periods is None else min_periods + df = self._series_frame(series) + dispersion = Dispersion( + method="std", + input_col="value", + output_col="std", + axis="ts", + window_type="rolling", + window_size=window, + min_periods=min_periods, + ).compute(df) + return dispersion["std"] def _rolling_stat( self, @@ -116,10 +177,9 @@ def _rolling_stat( return getattr(series.rolling(window=window, min_periods=min_periods), stat)(**kwargs) def _compress(self, raw: pd.Series) -> pd.Series: - robust_scale = self._rolling_stat( + robust_scale = self._rolling_median( raw.abs(), window=self.compression_window, - stat="median", min_periods=self.compression_min_periods, ).replace(0, np.nan) diff --git a/src/factorlab/factors/volume/delta_negative_volume_indicator.py b/src/factorlab/factors/volume/delta_negative_volume_indicator.py index 447db2d..3207e47 100644 --- a/src/factorlab/factors/volume/delta_negative_volume_indicator.py +++ b/src/factorlab/factors/volume/delta_negative_volume_indicator.py @@ -25,12 +25,11 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: prev_volume = self._shift_by_asset(volume, 1) filtered = rel_change.where(volume < prev_volume, 0.0) - avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + avg_change = self._rolling_mean(filtered, window=self.hist_length) norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_stat( + std_change = self._rolling_std( rel_change, window=norm_window, - stat="std", min_periods=self.hist_length, ).replace(0, np.nan) nvi = avg_change / std_change diff --git a/src/factorlab/factors/volume/delta_positive_volume_indicator.py b/src/factorlab/factors/volume/delta_positive_volume_indicator.py index 90b6d94..31b8889 100644 --- a/src/factorlab/factors/volume/delta_positive_volume_indicator.py +++ b/src/factorlab/factors/volume/delta_positive_volume_indicator.py @@ -25,12 +25,11 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: prev_volume = self._shift_by_asset(volume, 1) filtered = rel_change.where(volume > prev_volume, 0.0) - avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + avg_change = self._rolling_mean(filtered, window=self.hist_length) norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_stat( + std_change = self._rolling_std( rel_change, window=norm_window, - stat="std", min_periods=self.hist_length, ).replace(0, np.nan) pvi = avg_change / std_change diff --git a/src/factorlab/factors/volume/delta_price_volume_fit.py b/src/factorlab/factors/volume/delta_price_volume_fit.py index 16cccb0..a44300c 100644 --- a/src/factorlab/factors/volume/delta_price_volume_fit.py +++ b/src/factorlab/factors/volume/delta_price_volume_fit.py @@ -24,10 +24,10 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: x = self._safe_log(volume) y = self._safe_log(close) - mean_x = self._rolling_stat(x, window=self.hist_length, stat="mean") - mean_y = self._rolling_stat(y, window=self.hist_length, stat="mean") - mean_xy = self._rolling_stat(x * y, window=self.hist_length, stat="mean") - mean_x2 = self._rolling_stat(x * x, window=self.hist_length, stat="mean") + mean_x = self._rolling_mean(x, window=self.hist_length) + mean_y = self._rolling_mean(y, window=self.hist_length) + mean_xy = self._rolling_mean(x * y, window=self.hist_length) + mean_x2 = self._rolling_mean(x * x, window=self.hist_length) cov_xy = mean_xy - (mean_x * mean_y) var_x = mean_x2 - (mean_x * mean_x) diff --git a/src/factorlab/factors/volume/delta_product_price_volume.py b/src/factorlab/factors/volume/delta_product_price_volume.py index cc63b33..68da9a5 100644 --- a/src/factorlab/factors/volume/delta_product_price_volume.py +++ b/src/factorlab/factors/volume/delta_product_price_volume.py @@ -31,10 +31,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri volume = df[self.volume_col] prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_stat( + median_volume = self._rolling_median( prior_volume, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ).replace(0, np.nan) normalized_volume = volume / median_volume @@ -43,10 +42,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri price_change = self._diff_by_asset(log_close, 1) prior_change = self._shift_by_asset(price_change, 1) - median_change = self._rolling_stat( + median_change = self._rolling_median( prior_change, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ) q75 = self._rolling_stat( @@ -71,5 +69,5 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri def _compute_volume(self, df: pd.DataFrame) -> pd.Series: normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) precursor = normalized_volume * normalized_change - ppv = self._rolling_stat(precursor, window=self.hist_length, stat="mean") + ppv = self._rolling_mean(precursor, window=self.hist_length) return ppv - self._shift_by_asset(ppv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_sum_price_volume.py b/src/factorlab/factors/volume/delta_sum_price_volume.py index 3f1e64e..3822e57 100644 --- a/src/factorlab/factors/volume/delta_sum_price_volume.py +++ b/src/factorlab/factors/volume/delta_sum_price_volume.py @@ -31,10 +31,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri volume = df[self.volume_col] prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_stat( + median_volume = self._rolling_median( prior_volume, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ).replace(0, np.nan) normalized_volume = volume / median_volume @@ -43,10 +42,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri price_change = self._diff_by_asset(log_close, 1) prior_change = self._shift_by_asset(price_change, 1) - median_change = self._rolling_stat( + median_change = self._rolling_median( prior_change, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ) q75 = self._rolling_stat( @@ -72,5 +70,5 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) precursor = normalized_volume + normalized_change.abs() precursor = precursor.where(normalized_change >= 0, -precursor) - spv = self._rolling_stat(precursor, window=self.hist_length, stat="mean") + spv = self._rolling_mean(precursor, window=self.hist_length) return spv - self._shift_by_asset(spv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_volume_momentum.py b/src/factorlab/factors/volume/delta_volume_momentum.py index 231c3b3..3a5e71a 100644 --- a/src/factorlab/factors/volume/delta_volume_momentum.py +++ b/src/factorlab/factors/volume/delta_volume_momentum.py @@ -20,7 +20,7 @@ def _generate_name(self) -> str: def _compute_volume(self, df: pd.DataFrame) -> pd.Series: volume = df[self.volume_col] - short_ma = self._rolling_stat(volume, window=self.hist_length, stat="mean") - long_ma = self._rolling_stat(volume, window=self.hist_length * self.multiplier, stat="mean") + short_ma = self._rolling_mean(volume, window=self.hist_length) + long_ma = self._rolling_mean(volume, window=self.hist_length * self.multiplier) vmom = self._safe_log(short_ma / long_ma.replace(0, np.nan)) return vmom - self._shift_by_asset(vmom, self.delta_len) diff --git a/src/factorlab/factors/volume/diff_price_volume_fit.py b/src/factorlab/factors/volume/diff_price_volume_fit.py index 621b63c..e68fc7f 100644 --- a/src/factorlab/factors/volume/diff_price_volume_fit.py +++ b/src/factorlab/factors/volume/diff_price_volume_fit.py @@ -24,10 +24,10 @@ def _pv_fit(self, df: pd.DataFrame, hist_length: int) -> pd.Series: x = self._safe_log(volume) y = self._safe_log(close) - mean_x = self._rolling_stat(x, window=hist_length, stat="mean") - mean_y = self._rolling_stat(y, window=hist_length, stat="mean") - mean_xy = self._rolling_stat(x * y, window=hist_length, stat="mean") - mean_x2 = self._rolling_stat(x * x, window=hist_length, stat="mean") + mean_x = self._rolling_mean(x, window=hist_length) + mean_y = self._rolling_mean(y, window=hist_length) + mean_xy = self._rolling_mean(x * y, window=hist_length) + mean_x2 = self._rolling_mean(x * x, window=hist_length) cov_xy = mean_xy - (mean_x * mean_y) var_x = mean_x2 - (mean_x * mean_x) diff --git a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py index 5f34833..3ce49d5 100644 --- a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py +++ b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py @@ -25,7 +25,7 @@ def _vwma_over_ma(self, df: pd.DataFrame, hist_length: int) -> pd.Series: vwma = self._rolling_stat(pv, window=hist_length, stat="sum") / self._rolling_stat( volume, window=hist_length, stat="sum" ).replace(0, np.nan) - ma = self._rolling_stat(close, window=hist_length, stat="mean") + ma = self._rolling_mean(close, window=hist_length) return self._safe_log(vwma / ma.replace(0, np.nan)) diff --git a/src/factorlab/factors/volume/negative_volume_indicator.py b/src/factorlab/factors/volume/negative_volume_indicator.py index abc5731..5e4a1db 100644 --- a/src/factorlab/factors/volume/negative_volume_indicator.py +++ b/src/factorlab/factors/volume/negative_volume_indicator.py @@ -24,12 +24,11 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: prev_volume = self._shift_by_asset(volume, 1) filtered = rel_change.where(volume < prev_volume, 0.0) - avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + avg_change = self._rolling_mean(filtered, window=self.hist_length) norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_stat( + std_change = self._rolling_std( rel_change, window=norm_window, - stat="std", min_periods=self.hist_length, ).replace(0, np.nan) diff --git a/src/factorlab/factors/volume/positive_volume_indicator.py b/src/factorlab/factors/volume/positive_volume_indicator.py index f378427..4db2308 100644 --- a/src/factorlab/factors/volume/positive_volume_indicator.py +++ b/src/factorlab/factors/volume/positive_volume_indicator.py @@ -24,12 +24,11 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: prev_volume = self._shift_by_asset(volume, 1) filtered = rel_change.where(volume > prev_volume, 0.0) - avg_change = self._rolling_stat(filtered, window=self.hist_length, stat="mean") + avg_change = self._rolling_mean(filtered, window=self.hist_length) norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_stat( + std_change = self._rolling_std( rel_change, window=norm_window, - stat="std", min_periods=self.hist_length, ).replace(0, np.nan) diff --git a/src/factorlab/factors/volume/price_volume_fit.py b/src/factorlab/factors/volume/price_volume_fit.py index 1681dde..39d7885 100644 --- a/src/factorlab/factors/volume/price_volume_fit.py +++ b/src/factorlab/factors/volume/price_volume_fit.py @@ -23,10 +23,10 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: x = self._safe_log(volume) y = self._safe_log(close) - mean_x = self._rolling_stat(x, window=self.hist_length, stat="mean") - mean_y = self._rolling_stat(y, window=self.hist_length, stat="mean") - mean_xy = self._rolling_stat(x * y, window=self.hist_length, stat="mean") - mean_x2 = self._rolling_stat(x * x, window=self.hist_length, stat="mean") + mean_x = self._rolling_mean(x, window=self.hist_length) + mean_y = self._rolling_mean(y, window=self.hist_length) + mean_xy = self._rolling_mean(x * y, window=self.hist_length) + mean_x2 = self._rolling_mean(x * x, window=self.hist_length) cov_xy = mean_xy - (mean_x * mean_y) var_x = mean_x2 - (mean_x * mean_x) diff --git a/src/factorlab/factors/volume/product_price_volume.py b/src/factorlab/factors/volume/product_price_volume.py index f463094..962d4c8 100644 --- a/src/factorlab/factors/volume/product_price_volume.py +++ b/src/factorlab/factors/volume/product_price_volume.py @@ -29,10 +29,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri volume = df[self.volume_col] prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_stat( + median_volume = self._rolling_median( prior_volume, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ).replace(0, np.nan) normalized_volume = volume / median_volume @@ -41,10 +40,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri price_change = self._diff_by_asset(log_close, 1) prior_change = self._shift_by_asset(price_change, 1) - median_change = self._rolling_stat( + median_change = self._rolling_median( prior_change, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ) q75 = self._rolling_stat( @@ -69,4 +67,4 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri def _compute_volume(self, df: pd.DataFrame) -> pd.Series: normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) precursor = normalized_volume * normalized_change - return self._rolling_stat(precursor, window=self.hist_length, stat="mean") + return self._rolling_mean(precursor, window=self.hist_length) diff --git a/src/factorlab/factors/volume/sum_price_volume.py b/src/factorlab/factors/volume/sum_price_volume.py index a7e2cc5..d8d60e2 100644 --- a/src/factorlab/factors/volume/sum_price_volume.py +++ b/src/factorlab/factors/volume/sum_price_volume.py @@ -29,10 +29,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri volume = df[self.volume_col] prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_stat( + median_volume = self._rolling_median( prior_volume, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ).replace(0, np.nan) normalized_volume = volume / median_volume @@ -41,10 +40,9 @@ def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Seri price_change = self._diff_by_asset(log_close, 1) prior_change = self._shift_by_asset(price_change, 1) - median_change = self._rolling_stat( + median_change = self._rolling_median( prior_change, window=self.norm_lookback, - stat="median", min_periods=self.norm_min_periods, ) q75 = self._rolling_stat( @@ -70,4 +68,4 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) precursor = normalized_volume + normalized_change.abs() precursor = precursor.where(normalized_change >= 0, -precursor) - return self._rolling_stat(precursor, window=self.hist_length, stat="mean") + return self._rolling_mean(precursor, window=self.hist_length) diff --git a/src/factorlab/factors/volume/volume_momentum.py b/src/factorlab/factors/volume/volume_momentum.py index 44f43e0..b0a362e 100644 --- a/src/factorlab/factors/volume/volume_momentum.py +++ b/src/factorlab/factors/volume/volume_momentum.py @@ -19,7 +19,7 @@ def _generate_name(self) -> str: def _compute_volume(self, df: pd.DataFrame) -> pd.Series: volume = df[self.volume_col] - short_ma = self._rolling_stat(volume, window=self.hist_length, stat="mean") - long_ma = self._rolling_stat(volume, window=self.hist_length * self.multiplier, stat="mean") + short_ma = self._rolling_mean(volume, window=self.hist_length) + long_ma = self._rolling_mean(volume, window=self.hist_length * self.multiplier) ratio = short_ma / long_ma.replace(0, np.nan) return self._safe_log(ratio) diff --git a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py index 6077919..ee080a8 100644 --- a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py +++ b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py @@ -24,6 +24,6 @@ def _compute_volume(self, df: pd.DataFrame) -> pd.Series: vwma = self._rolling_stat(pv, window=self.hist_length, stat="sum") / self._rolling_stat( volume, window=self.hist_length, stat="sum" ).replace(0, np.nan) - ma = self._rolling_stat(close, window=self.hist_length, stat="mean") + ma = self._rolling_mean(close, window=self.hist_length) return self._safe_log(vwma / ma.replace(0, np.nan)) From 1181d04f8cfc4008954e5670eb8cd76cd53db679 Mon Sep 17 00:00:00 2001 From: Misha Kutsovsky Date: Wed, 18 Feb 2026 19:35:57 -0500 Subject: [PATCH 5/6] astrology --- src/factorlab/factors/__init__.py | 1 - src/factorlab/factors/astrology/__init__.py | 50 +++ .../factors/astrology/all_features.py | 119 +++++++ .../factors/astrology/aspect_dynamics.py | 65 ++++ src/factorlab/factors/astrology/astrology.py | 145 ++++++++ src/factorlab/factors/astrology/base.py | 101 ++++++ .../factors/astrology/bradley_siderograph.py | 76 ++++ .../astrology/commodity_natal_transits.py | 69 ++++ src/factorlab/factors/astrology/common.py | 329 ++++++++++++++++++ .../factors/astrology/cyclical_encoding.py | 52 +++ .../factors/astrology/declination_aspects.py | 48 +++ .../factors/astrology/dewey_oscillators.py | 74 ++++ .../factors/astrology/eclipse_score.py | 61 ++++ .../factors/astrology/essential_dignity.py | 41 +++ .../factors/astrology/gann_square_of_nine.py | 76 ++++ .../astrology/heliocentric_features.py | 89 +++++ .../factors/astrology/lunar_features.py | 45 +++ .../astrology/mcwhirter_nodal_cycle.py | 34 ++ .../factors/astrology/midpoint_activations.py | 72 ++++ .../astrology/natal_transit_aspects.py | 71 ++++ .../factors/astrology/planetary_aspects.py | 73 ++++ .../factors/astrology/planetary_ingress.py | 45 +++ .../astrology/planetary_speed_features.py | 57 +++ .../astrology/price_longitude_angles.py | 74 ++++ .../factors/astrology/retrograde_indicator.py | 46 +++ .../factors/astrology/synodic_cycle_phase.py | 47 +++ 26 files changed, 1959 insertions(+), 1 deletion(-) create mode 100644 src/factorlab/factors/astrology/__init__.py create mode 100644 src/factorlab/factors/astrology/all_features.py create mode 100644 src/factorlab/factors/astrology/aspect_dynamics.py create mode 100644 src/factorlab/factors/astrology/astrology.py create mode 100644 src/factorlab/factors/astrology/base.py create mode 100644 src/factorlab/factors/astrology/bradley_siderograph.py create mode 100644 src/factorlab/factors/astrology/commodity_natal_transits.py create mode 100644 src/factorlab/factors/astrology/common.py create mode 100644 src/factorlab/factors/astrology/cyclical_encoding.py create mode 100644 src/factorlab/factors/astrology/declination_aspects.py create mode 100644 src/factorlab/factors/astrology/dewey_oscillators.py create mode 100644 src/factorlab/factors/astrology/eclipse_score.py create mode 100644 src/factorlab/factors/astrology/essential_dignity.py create mode 100644 src/factorlab/factors/astrology/gann_square_of_nine.py create mode 100644 src/factorlab/factors/astrology/heliocentric_features.py create mode 100644 src/factorlab/factors/astrology/lunar_features.py create mode 100644 src/factorlab/factors/astrology/mcwhirter_nodal_cycle.py create mode 100644 src/factorlab/factors/astrology/midpoint_activations.py create mode 100644 src/factorlab/factors/astrology/natal_transit_aspects.py create mode 100644 src/factorlab/factors/astrology/planetary_aspects.py create mode 100644 src/factorlab/factors/astrology/planetary_ingress.py create mode 100644 src/factorlab/factors/astrology/planetary_speed_features.py create mode 100644 src/factorlab/factors/astrology/price_longitude_angles.py create mode 100644 src/factorlab/factors/astrology/retrograde_indicator.py create mode 100644 src/factorlab/factors/astrology/synodic_cycle_phase.py diff --git a/src/factorlab/factors/__init__.py b/src/factorlab/factors/__init__.py index 3068a35..3f05dfd 100644 --- a/src/factorlab/factors/__init__.py +++ b/src/factorlab/factors/__init__.py @@ -1,4 +1,3 @@ from factorlab.factors.base import Factor __all__ = ["Factor"] - diff --git a/src/factorlab/factors/astrology/__init__.py b/src/factorlab/factors/astrology/__init__.py new file mode 100644 index 0000000..ca3e168 --- /dev/null +++ b/src/factorlab/factors/astrology/__init__.py @@ -0,0 +1,50 @@ +from factorlab.factors.astrology.all_features import AllAstrologyFeatures +from factorlab.factors.astrology.aspect_dynamics import AspectDynamics +from factorlab.factors.astrology.astrology import Astrology +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.bradley_siderograph import BradleySiderograph +from factorlab.factors.astrology.commodity_natal_transits import CommodityNatalTransits +from factorlab.factors.astrology.cyclical_encoding import CyclicalEncoding +from factorlab.factors.astrology.declination_aspects import DeclinationAspects +from factorlab.factors.astrology.dewey_oscillators import DeweyOscillators +from factorlab.factors.astrology.eclipse_score import EclipseScore +from factorlab.factors.astrology.essential_dignity import EssentialDignity +from factorlab.factors.astrology.gann_square_of_nine import GannSquareOfNine +from factorlab.factors.astrology.heliocentric_features import HeliocentricFeatures +from factorlab.factors.astrology.lunar_features import LunarFeatures +from factorlab.factors.astrology.mcwhirter_nodal_cycle import McWhirterNodalCycle +from factorlab.factors.astrology.midpoint_activations import MidpointActivations +from factorlab.factors.astrology.natal_transit_aspects import NatalTransitAspects +from factorlab.factors.astrology.planetary_aspects import PlanetaryAspects +from factorlab.factors.astrology.planetary_ingress import PlanetaryIngress +from factorlab.factors.astrology.planetary_speed_features import PlanetarySpeedFeatures +from factorlab.factors.astrology.price_longitude_angles import PriceLongitudeAngles +from factorlab.factors.astrology.retrograde_indicator import RetrogradeIndicator +from factorlab.factors.astrology.synodic_cycle_phase import SynodicCyclePhase + + +__all__ = [ + "Astrology", + "AstrologyFactor", + "AllAstrologyFeatures", + "PlanetaryAspects", + "BradleySiderograph", + "RetrogradeIndicator", + "PlanetaryIngress", + "NatalTransitAspects", + "LunarFeatures", + "CyclicalEncoding", + "PriceLongitudeAngles", + "SynodicCyclePhase", + "McWhirterNodalCycle", + "DeweyOscillators", + "EssentialDignity", + "DeclinationAspects", + "EclipseScore", + "PlanetarySpeedFeatures", + "HeliocentricFeatures", + "AspectDynamics", + "GannSquareOfNine", + "CommodityNatalTransits", + "MidpointActivations", +] diff --git a/src/factorlab/factors/astrology/all_features.py b/src/factorlab/factors/astrology/all_features.py new file mode 100644 index 0000000..17e6123 --- /dev/null +++ b/src/factorlab/factors/astrology/all_features.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from typing import Optional + +import pandas as pd + +from factorlab.factors.astrology.aspect_dynamics import AspectDynamics +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.bradley_siderograph import BradleySiderograph +from factorlab.factors.astrology.commodity_natal_transits import CommodityNatalTransits +from factorlab.factors.astrology.cyclical_encoding import CyclicalEncoding +from factorlab.factors.astrology.declination_aspects import DeclinationAspects +from factorlab.factors.astrology.dewey_oscillators import DeweyOscillators +from factorlab.factors.astrology.eclipse_score import EclipseScore +from factorlab.factors.astrology.essential_dignity import EssentialDignity +from factorlab.factors.astrology.gann_square_of_nine import GannSquareOfNine +from factorlab.factors.astrology.heliocentric_features import HeliocentricFeatures +from factorlab.factors.astrology.lunar_features import LunarFeatures +from factorlab.factors.astrology.mcwhirter_nodal_cycle import McWhirterNodalCycle +from factorlab.factors.astrology.midpoint_activations import MidpointActivations +from factorlab.factors.astrology.natal_transit_aspects import NatalTransitAspects +from factorlab.factors.astrology.planetary_aspects import PlanetaryAspects +from factorlab.factors.astrology.planetary_ingress import PlanetaryIngress +from factorlab.factors.astrology.planetary_speed_features import PlanetarySpeedFeatures +from factorlab.factors.astrology.price_longitude_angles import PriceLongitudeAngles +from factorlab.factors.astrology.retrograde_indicator import RetrogradeIndicator +from factorlab.factors.astrology.synodic_cycle_phase import SynodicCyclePhase +from factorlab.factors.astrology.common import get_dates_planets + + +class AllAstrologyFeatures(AstrologyFactor): + """Composite factor that returns the full astrology feature set.""" + + def __init__( + self, + include_natal: bool = True, + include_price_angles: bool = False, + include_heliocentric: bool = False, + include_commodity_natal: bool = False, + include_gann_so9: bool = False, + anchor_price: float = 1.0, + scale: float = 1.0, + helio_ephemeris_df: Optional[pd.DataFrame] = None, + target_series: Optional[pd.Series] = None, + **kwargs, + ): + super().__init__( + description="Composite astrology factor bundle.", + tags=["astrology", "composite", "feature_bundle"], + **kwargs, + ) + self.include_natal = include_natal + self.include_price_angles = include_price_angles + self.include_heliocentric = include_heliocentric + self.include_commodity_natal = include_commodity_natal + self.include_gann_so9 = include_gann_so9 + self.anchor_price = anchor_price + self.scale = scale + self.helio_ephemeris_df = helio_ephemeris_df + self.target_series = target_series + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, planets = get_dates_planets(ephemeris_df) + features = [] + + def _append(df: pd.DataFrame) -> None: + if df is not None and not df.empty: + features.append(df) + + _append(PlanetaryAspects().compute(ephemeris_df)) + _append(BradleySiderograph().compute(ephemeris_df)) + _append(RetrogradeIndicator().compute(ephemeris_df)) + _append(PlanetaryIngress().compute(ephemeris_df)) + _append(LunarFeatures().compute(ephemeris_df)) + _append(CyclicalEncoding().compute(ephemeris_df)) + + if self.include_natal: + _append(NatalTransitAspects(natal_date=self.natal_date).compute(ephemeris_df)) + + if self.include_price_angles: + for planet in ["sun", "jupiter", "saturn"]: + if planet in planets: + _append( + PriceLongitudeAngles( + planet=planet, + anchor_price=self.anchor_price, + scale=self.scale, + ).compute(ephemeris_df) + ) + + _append(SynodicCyclePhase().compute(ephemeris_df)) + _append(McWhirterNodalCycle().compute(ephemeris_df)) + _append( + DeweyOscillators( + data_driven=self.target_series is not None, + target_series=self.target_series, + ).compute(ephemeris_df) + ) + _append(EssentialDignity().compute(ephemeris_df)) + _append(DeclinationAspects().compute(ephemeris_df)) + _append(EclipseScore().compute(ephemeris_df)) + _append(PlanetarySpeedFeatures().compute(ephemeris_df)) + _append(AspectDynamics().compute(ephemeris_df)) + _append(MidpointActivations().compute(ephemeris_df)) + + if self.include_heliocentric and self.helio_ephemeris_df is not None: + _append(HeliocentricFeatures(helio_ephemeris_df=self.helio_ephemeris_df).compute(ephemeris_df)) + + if self.include_commodity_natal: + _append(CommodityNatalTransits().compute(ephemeris_df)) + + if self.include_gann_so9 and self.price_df is not None: + _append(GannSquareOfNine(price_df=self.price_df).compute(ephemeris_df)) + + if not features: + return pd.DataFrame(index=dates) + + result = pd.concat(features, axis=1) + return result.loc[:, ~result.columns.duplicated()] diff --git a/src/factorlab/factors/astrology/aspect_dynamics.py b/src/factorlab/factors/astrology/aspect_dynamics.py new file mode 100644 index 0000000..43c2eb0 --- /dev/null +++ b/src/factorlab/factors/astrology/aspect_dynamics.py @@ -0,0 +1,65 @@ +from __future__ import annotations + +from typing import List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + ASPECT_ANGLES, + DEFAULT_ORBS, + compute_aspect_distance, + get_dates_planets, + get_planet_longitude, +) + + +class AspectDynamics(AstrologyFactor): + """Applying vs separating aspect-state flags.""" + + def __init__( + self, + planets: Optional[List[str]] = None, + aspect_types: Optional[List[str]] = None, + orb: float = 10.0, + **kwargs, + ): + super().__init__( + description="Applying/separating state for planetary aspects.", + tags=["astrology", "aspects", "dynamics"], + **kwargs, + ) + self.planets = planets + self.aspect_types = aspect_types + self.orb = orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or available_planets + aspect_types = self.aspect_types or ["conjunction", "square", "trine", "opposition"] + + results = {} + for i, p1 in enumerate(planets): + lon1 = get_planet_longitude(ephemeris_df, p1).reindex(dates) + if lon1.empty: + continue + for p2 in planets[i + 1 :]: + lon2 = get_planet_longitude(ephemeris_df, p2).reindex(dates) + if lon2.empty: + continue + + for aspect_name in aspect_types: + angle = ASPECT_ANGLES[aspect_name] + aspect_orb = DEFAULT_ORBS.get(aspect_name, self.orb) + dist = compute_aspect_distance(lon1, lon2, angle) + in_orb = dist <= aspect_orb + + dist_change = dist.diff() + applying = (dist_change < 0).astype(int) + separating = (dist_change > 0).astype(int) + + prefix = f"{p1}_{p2}_{aspect_name}" + results[f"{prefix}_applying"] = applying * in_orb.astype(int) + results[f"{prefix}_separating"] = separating * in_orb.astype(int) + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/astrology.py b/src/factorlab/factors/astrology/astrology.py new file mode 100644 index 0000000..b59733b --- /dev/null +++ b/src/factorlab/factors/astrology/astrology.py @@ -0,0 +1,145 @@ +from __future__ import annotations + +from typing import ClassVar, Dict, Optional, Type, Union + +import pandas as pd + +from factorlab.core.base_transform import BaseTransform +from factorlab.factors.base import Factor +from factorlab.factors.astrology.all_features import AllAstrologyFeatures +from factorlab.factors.astrology.aspect_dynamics import AspectDynamics +from factorlab.factors.astrology.bradley_siderograph import BradleySiderograph +from factorlab.factors.astrology.commodity_natal_transits import CommodityNatalTransits +from factorlab.factors.astrology.cyclical_encoding import CyclicalEncoding +from factorlab.factors.astrology.declination_aspects import DeclinationAspects +from factorlab.factors.astrology.dewey_oscillators import DeweyOscillators +from factorlab.factors.astrology.eclipse_score import EclipseScore +from factorlab.factors.astrology.essential_dignity import EssentialDignity +from factorlab.factors.astrology.gann_square_of_nine import GannSquareOfNine +from factorlab.factors.astrology.heliocentric_features import HeliocentricFeatures +from factorlab.factors.astrology.lunar_features import LunarFeatures +from factorlab.factors.astrology.mcwhirter_nodal_cycle import McWhirterNodalCycle +from factorlab.factors.astrology.midpoint_activations import MidpointActivations +from factorlab.factors.astrology.natal_transit_aspects import NatalTransitAspects +from factorlab.factors.astrology.planetary_aspects import PlanetaryAspects +from factorlab.factors.astrology.planetary_ingress import PlanetaryIngress +from factorlab.factors.astrology.planetary_speed_features import PlanetarySpeedFeatures +from factorlab.factors.astrology.price_longitude_angles import PriceLongitudeAngles +from factorlab.factors.astrology.retrograde_indicator import RetrogradeIndicator +from factorlab.factors.astrology.synodic_cycle_phase import SynodicCyclePhase + + +class Astrology(Factor): + """Factory class for astrology factor indicators.""" + + _METHOD_MAP: ClassVar[Dict[str, Type[BaseTransform]]] = { + "planetary_aspects": PlanetaryAspects, + "bradley_siderograph": BradleySiderograph, + "retrograde_indicator": RetrogradeIndicator, + "planetary_ingress": PlanetaryIngress, + "natal_transit_aspects": NatalTransitAspects, + "lunar_features": LunarFeatures, + "cyclical_encoding": CyclicalEncoding, + "price_longitude_angles": PriceLongitudeAngles, + "synodic_cycle_phase": SynodicCyclePhase, + "mcwhirter_nodal_cycle": McWhirterNodalCycle, + "dewey_oscillators": DeweyOscillators, + "essential_dignity": EssentialDignity, + "declination_aspects": DeclinationAspects, + "eclipse_score": EclipseScore, + "planetary_speed_features": PlanetarySpeedFeatures, + "heliocentric_features": HeliocentricFeatures, + "aspect_dynamics": AspectDynamics, + "gann_square_of_nine": GannSquareOfNine, + "commodity_natal_transits": CommodityNatalTransits, + "midpoint_activations": MidpointActivations, + "all_features": AllAstrologyFeatures, + } + + _ALIASES: ClassVar[Dict[str, str]] = { + "aspects": "planetary_aspects", + "bradley": "bradley_siderograph", + "retrograde": "retrograde_indicator", + "ingress": "planetary_ingress", + "natal_transits": "natal_transit_aspects", + "lunar": "lunar_features", + "cyclical": "cyclical_encoding", + "price_angles": "price_longitude_angles", + "synodic": "synodic_cycle_phase", + "mcwhirter": "mcwhirter_nodal_cycle", + "dewey": "dewey_oscillators", + "dignity": "essential_dignity", + "declination": "declination_aspects", + "eclipse": "eclipse_score", + "speed": "planetary_speed_features", + "helio": "heliocentric_features", + "dynamics": "aspect_dynamics", + "so9": "gann_square_of_nine", + "commodity_natal": "commodity_natal_transits", + "midpoints": "midpoint_activations", + "all": "all_features", + } + + @classmethod + def get_factor_metadata(cls) -> pd.DataFrame: + data = [] + for alias, factor_class in cls._METHOD_MAP.items(): + try: + instance = factor_class() + data.append( + { + "Alias": alias, + "Class": factor_class.__name__, + "Description": instance.description, + } + ) + except Exception as exc: + data.append( + { + "Alias": alias, + "Class": factor_class.__name__, + "Description": f"Instantiation Failed: {exc}", + } + ) + + return pd.DataFrame(data).set_index("Alias") + + def __init__(self, method: str = "all_features", **kwargs): + super().__init__( + name="Astrology", + description="A factory for astrology factors.", + category="Astrology", + tags=["astrology", "ephemeris", "cycles"], + ) + + method = method.lower().strip() + self.method = self._ALIASES.get(method, method) + self.kwargs = kwargs + + if self.method not in self._METHOD_MAP: + raise ValueError( + f"Invalid astrology factor method '{self.method}'. " + f"Method must be one of: {list(self._METHOD_MAP.keys())}" + ) + + factor_class = self._METHOD_MAP[self.method] + self._factor: Factor = factor_class(**self.kwargs) + + @property + def inputs(self) -> list[str]: + return self._factor.inputs + + def fit( + self, + X: Union[pd.Series, pd.DataFrame], + y: Optional[Union[pd.Series, pd.DataFrame]] = None, + ) -> "Astrology": + self._factor.fit(X, y) + self._is_fitted = True + return self + + def transform(self, data: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame: + if not self._is_fitted: + raise RuntimeError("Astrology transform must be fitted before calling transform().") + + return self._factor.transform(data) diff --git a/src/factorlab/factors/astrology/base.py b/src/factorlab/factors/astrology/base.py new file mode 100644 index 0000000..3c34cf6 --- /dev/null +++ b/src/factorlab/factors/astrology/base.py @@ -0,0 +1,101 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import List, Optional, Union + +import pandas as pd + +from factorlab.factors.base import Factor +from factorlab.factors.astrology.common import BTC_NATAL_DATE +from factorlab.utils import to_dataframe + + +class AstrologyFactor(Factor, ABC): + """Base class for astrology factor indicators built on ephemeris data.""" + + def __init__( + self, + name: Optional[str] = None, + description: Optional[str] = None, + tags: Optional[List[str]] = None, + price_df: Optional[pd.DataFrame] = None, + natal_date: Optional[Union[str, pd.Timestamp]] = None, + broadcast_to_assets: bool = False, + output_prefix: Optional[str] = None, + ): + super().__init__( + name=name or self.__class__.__name__, + description=description or "Astrology factor.", + category="Astrology", + tags=tags or ["astrology", "ephemeris", "cycles"], + ) + self.price_df = price_df + self.natal_date = pd.Timestamp(natal_date) if natal_date else pd.Timestamp(BTC_NATAL_DATE) + self.broadcast_to_assets = broadcast_to_assets + self.output_prefix = output_prefix + + @property + def inputs(self) -> List[str]: + return [] + + def _validate_ephemeris(self, ephemeris_df: pd.DataFrame) -> None: + if not isinstance(ephemeris_df.index, pd.MultiIndex): + raise ValueError("Expected ephemeris input indexed by MultiIndex(date, ticker).") + + required_index_levels = {"date", "ticker"} + if not required_index_levels.issubset(set(ephemeris_df.index.names)): + raise ValueError( + "Ephemeris index must contain levels named 'date' and 'ticker'." + ) + + required_cols = {"longitude"} + missing = required_cols - set(ephemeris_df.columns) + if missing: + raise ValueError(f"Missing required ephemeris columns: {missing}") + + def fit( + self, + X: Union[pd.Series, pd.DataFrame], + y: Optional[Union[pd.Series, pd.DataFrame]] = None, + ) -> "AstrologyFactor": + ephemeris_df = to_dataframe(X) + self._validate_ephemeris(ephemeris_df) + self._is_fitted = True + return self + + @abstractmethod + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + raise NotImplementedError + + def _apply_output_prefix(self, features: pd.DataFrame) -> pd.DataFrame: + if not self.output_prefix or features.empty: + return features + out = features.copy() + out.columns = [f"{self.output_prefix}{c}" for c in out.columns] + return out + + def _broadcast_by_asset( + self, + features: pd.DataFrame, + ephemeris_df: pd.DataFrame, + ) -> pd.DataFrame: + if features.empty: + return pd.DataFrame(index=ephemeris_df.index) + + date_index = ephemeris_df.index.get_level_values("date") + expanded = features.reindex(date_index) + expanded.index = ephemeris_df.index + return expanded + + def transform(self, X: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame: + if not self._is_fitted: + raise RuntimeError(f"Transform '{self.name}' must be fitted before calling transform().") + + ephemeris_df = to_dataframe(X).sort_index() + self._validate_ephemeris(ephemeris_df) + features = self._compute_astrology(ephemeris_df) + features = self._apply_output_prefix(features) + + if self.broadcast_to_assets: + return self._broadcast_by_asset(features, ephemeris_df) + return features diff --git a/src/factorlab/factors/astrology/bradley_siderograph.py b/src/factorlab/factors/astrology/bradley_siderograph.py new file mode 100644 index 0000000..3954b43 --- /dev/null +++ b/src/factorlab/factors/astrology/bradley_siderograph.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + ASPECT_ANGLES, + BRADLEY_MIDTERM_PAIRS, + BRADLEY_VALENCY, + aspect_weight, + compute_aspect_distance, + get_dates_planets, + get_planet_field, + get_planet_longitude, +) + + +class BradleySiderograph(AstrologyFactor): + """Bradley siderograph potential and component terms.""" + + def __init__(self, multiplier: float = 1.0, **kwargs): + super().__init__( + description="Bradley siderograph composite potential.", + tags=["astrology", "bradley", "cycles"], + **kwargs, + ) + self.multiplier = multiplier + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + long_terms = pd.Series(0.0, index=dates, name="long_terms") + mid_terms = pd.Series(0.0, index=dates, name="mid_terms") + + for (p1, p2), valency_map in BRADLEY_VALENCY.items(): + lon1 = get_planet_longitude(ephemeris_df, p1) + lon2 = get_planet_longitude(ephemeris_df, p2) + if lon1.empty or lon2.empty: + continue + + lon1 = lon1.reindex(dates) + lon2 = lon2.reindex(dates) + for aspect_name, valency in valency_map.items(): + angle = ASPECT_ANGLES[aspect_name] + dist = compute_aspect_distance(lon1, lon2, angle) + weight = aspect_weight(dist, orb=15.0) + long_terms = long_terms + weight * valency + + for p1, p2 in BRADLEY_MIDTERM_PAIRS: + lon1 = get_planet_longitude(ephemeris_df, p1) + lon2 = get_planet_longitude(ephemeris_df, p2) + if lon1.empty or lon2.empty: + continue + + lon1 = lon1.reindex(dates) + lon2 = lon2.reindex(dates) + for aspect_name in ["conjunction", "sextile", "square", "trine", "opposition"]: + angle = ASPECT_ANGLES[aspect_name] + dist = compute_aspect_distance(lon1, lon2, angle) + weight = aspect_weight(dist, orb=15.0) + valency = 1 if aspect_name in ("sextile", "trine", "conjunction") else -1 + mid_terms = mid_terms + weight * valency + + mars_decl = get_planet_field(ephemeris_df, "mars", "declination").reindex(dates) + venus_decl = get_planet_field(ephemeris_df, "venus", "declination").reindex(dates) + declination_factor = (mars_decl.fillna(0) + venus_decl.fillna(0)) / 2 + + sidereal_potential = self.multiplier * (long_terms + declination_factor) + mid_terms + + return pd.DataFrame( + { + "sidereal_potential": sidereal_potential, + "long_terms": long_terms, + "mid_terms": mid_terms, + "declination_factor": declination_factor, + } + ) diff --git a/src/factorlab/factors/astrology/commodity_natal_transits.py b/src/factorlab/factors/astrology/commodity_natal_transits.py new file mode 100644 index 0000000..ce3a1ee --- /dev/null +++ b/src/factorlab/factors/astrology/commodity_natal_transits.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from typing import Dict, List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + ASPECT_ANGLES, + COMMODITY_NATAL_DATES, + aspect_weight, + compute_aspect_distance, + get_dates_planets, + get_natal_positions, + get_planet_longitude, +) + + +class CommodityNatalTransits(AstrologyFactor): + """Ticker-specific transit scores using first-trade natal dates.""" + + def __init__( + self, + ticker_natal_dates: Optional[Dict[str, str]] = None, + planets: Optional[List[str]] = None, + aspect_types: Optional[List[str]] = None, + orb: float = 8.0, + **kwargs, + ): + super().__init__( + description="Commodity natal transit activation scores.", + tags=["astrology", "natal", "commodities"], + **kwargs, + ) + self.ticker_natal_dates = ticker_natal_dates + self.planets = planets + self.aspect_types = aspect_types + self.orb = orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + ticker_natal_dates = self.ticker_natal_dates or COMMODITY_NATAL_DATES + planets = self.planets or ["jupiter", "saturn", "uranus", "neptune", "pluto"] + aspect_types = self.aspect_types or ["conjunction", "square", "opposition"] + + all_results = {} + for ticker, natal_str in ticker_natal_dates.items(): + natal_positions = get_natal_positions(pd.Timestamp(natal_str), planets) + if not natal_positions: + continue + + ticker_score = pd.Series(0.0, index=dates) + for planet in planets: + if planet not in natal_positions: + continue + + transit_lon = get_planet_longitude(ephemeris_df, planet).reindex(dates) + if transit_lon.empty: + continue + + natal_lon = natal_positions[planet] + for aspect_name in aspect_types: + angle = ASPECT_ANGLES[aspect_name] + dist = compute_aspect_distance(transit_lon, natal_lon, angle) + ticker_score = ticker_score + aspect_weight(dist, self.orb) + + all_results[f"natal_transit_{ticker}"] = ticker_score + + return pd.DataFrame(all_results) if all_results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/common.py b/src/factorlab/factors/astrology/common.py new file mode 100644 index 0000000..83eddf0 --- /dev/null +++ b/src/factorlab/factors/astrology/common.py @@ -0,0 +1,329 @@ +from __future__ import annotations + +import logging +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +logger = logging.getLogger(__name__) + +# Standard aspect angles and their names (including minor aspects) +ASPECT_ANGLES = { + "conjunction": 0, + "semi_sextile": 30, + "semi_square": 45, + "sextile": 60, + "square": 90, + "trine": 120, + "sesquiquadrate": 135, + "quincunx": 150, + "opposition": 180, +} + +# Default orbs for each aspect type (degrees) +DEFAULT_ORBS = { + "conjunction": 10, + "semi_sextile": 3, + "semi_square": 3, + "sextile": 8, + "square": 10, + "trine": 10, + "sesquiquadrate": 3, + "quincunx": 5, + "opposition": 10, +} + +# BTC genesis natal date +BTC_NATAL_DATE = "2009-01-03 18:15:00" + +# Bradley Siderograph valency table: +1 = harmonious, -1 = challenging +BRADLEY_VALENCY = { + ("jupiter", "saturn"): { + "conjunction": -1, + "sextile": 1, + "square": -1, + "trine": 1, + "opposition": -1, + }, + ("jupiter", "neptune"): { + "conjunction": 1, + "sextile": 1, + "square": -1, + "trine": 1, + "opposition": -1, + }, + ("jupiter", "uranus"): { + "conjunction": 1, + "sextile": 1, + "square": -1, + "trine": 1, + "opposition": -1, + }, + ("saturn", "neptune"): { + "conjunction": -1, + "sextile": 1, + "square": -1, + "trine": 1, + "opposition": -1, + }, + ("saturn", "uranus"): { + "conjunction": -1, + "sextile": 1, + "square": -1, + "trine": 1, + "opposition": -1, + }, + ("neptune", "uranus"): { + "conjunction": 1, + "sextile": 1, + "square": -1, + "trine": 1, + "opposition": -1, + }, +} + +# Mid-term planet pairs for Bradley +BRADLEY_MIDTERM_PAIRS = [ + ("sun", "mercury"), + ("sun", "venus"), + ("sun", "mars"), + ("mercury", "venus"), + ("mercury", "mars"), + ("venus", "mars"), +] + +# First-trade dates for commodities/indices (from astro_dates.jpg) +COMMODITY_NATAL_DATES = { + "Wheat": "1884-05-01", + "Corn": "1888-07-14", + "Oats": "1888-07-13", + "Soybeans": "1936-10-05", + "Soybean_Oil": "1950-07-17", + "Soybean_Meal": "1951-08-17", + "Gold": "1974-12-31", + "Silver": "1931-06-15", + "Copper": "1933-07-05", + "Platinum": "1956-03-04", + "Palladium": "1968-01-02", + "Coffee": "1882-03-07", + "Cocoa": "1925-10-01", + "Sugar": "1914-12-16", + "Cotton": "1870-09-01", + "Orange_Juice": "1966-02-01", + "Lumber": "1969-10-01", + "Crude_Oil": "1983-03-30", + "Natural_Gas": "1990-04-03", + "Heating_Oil": "1978-11-14", + "Treasury_Bonds": "1977-08-22", + "SP500": "1982-04-21", + "Live_Cattle": "1964-11-30", + "Lean_Hogs": "1966-02-28", + "Feeder_Cattle": "1971-11-30", + "Currencies": "1972-05-16", + "BTC": "2009-01-03", + "ETH": "2015-07-30", +} + +# Essential dignity table (Compendium 2.3) +DIGNITY_TABLE = { + "sun": {4: 5, 0: 4, 10: -5, 6: -4}, + "moon": {3: 5, 1: 4, 9: -5, 7: -4}, + "mercury": {2: 5, 5: 5, 8: -5, 11: -5}, + "venus": {1: 5, 6: 5, 11: 4, 7: -5, 0: -5, 5: -4}, + "mars": {0: 5, 7: 5, 9: 4, 6: -5, 1: -5, 3: -4}, + "jupiter": {8: 5, 11: 5, 3: 4, 2: -5, 5: -5, 9: -4}, + "saturn": {9: 5, 10: 5, 6: 4, 3: -5, 4: -5, 0: -4}, + "uranus": {10: 5, 7: 4, 4: -5, 1: -4}, + "neptune": {11: 5, 3: 4, 5: -5, 9: -4}, + "pluto": {7: 5, 0: 4, 1: -5, 6: -4}, +} + +DEWEY_CYCLES = { + "41_month": {"period_days": 41 * 30.44, "ref_trough": "1932-06-01"}, + "9_2_year": {"period_days": 9.2 * 365.25, "ref_trough": "1932-06-01"}, + "18_6_year": {"period_days": 18.6 * 365.25, "ref_trough": "1932-06-01"}, + "54_year": {"period_days": 54 * 365.25, "ref_trough": "1932-06-01"}, +} + +SYNODIC_PAIRS = { + "jupiter_saturn": ("jupiter", "saturn"), + "jupiter_uranus": ("jupiter", "uranus"), + "saturn_neptune": ("saturn", "neptune"), + "saturn_pluto": ("saturn", "pluto"), + "jupiter_neptune": ("jupiter", "neptune"), + "jupiter_pluto": ("jupiter", "pluto"), +} + +MCWHIRTER_BULLISH_SIGNS = {11, 0, 1, 2, 3, 4} + + +def get_dates_planets(ephemeris_df: pd.DataFrame) -> Tuple[pd.Index, List[str]]: + """Extract sorted date index and available planets from ephemeris input.""" + dates = ephemeris_df.index.get_level_values("date").unique() + planets = ephemeris_df.index.get_level_values("ticker").unique().tolist() + return dates, planets + + +def deg_to_lowest_180(angle: Union[float, np.ndarray, pd.Series]) -> Union[float, np.ndarray, pd.Series]: + """Reduce an angle to the range -180 to +180 degrees.""" + a = angle % 360 + if isinstance(a, (pd.Series, np.ndarray)): + mask = a > 180 + if isinstance(a, pd.Series): + a = a.copy() + a[mask] = a[mask] - 360 + else: + a = np.where(mask, a - 360, a) + else: + if a > 180: + a -= 360 + return a + + +def compute_aspect_distance( + lon1: Union[float, pd.Series], + lon2: Union[float, pd.Series], + aspect_angle: float, +) -> Union[float, pd.Series]: + """Compute distance from exact aspect angle between two longitudes.""" + separation = abs(deg_to_lowest_180(lon1 - lon2)) + if isinstance(separation, pd.Series): + separation = separation.abs() + else: + separation = abs(separation) + return abs(separation - aspect_angle) + + +def aspect_weight(distance: Union[float, pd.Series], orb: float = 15.0) -> Union[float, pd.Series]: + """Step-interpolated weight based on distance to exact aspect (0-10 scale).""" + if isinstance(distance, pd.Series): + w = pd.Series(0.0, index=distance.index) + w[distance <= orb] = 0.0 + w[distance <= 10] = 2.5 + w[distance <= 5] = 7.5 + w[distance <= 0] = 10.0 + w[distance > orb] = 0.0 + return w + + if distance > orb: + return 0.0 + if distance > 10: + return 0.0 + if distance > 5: + return 2.5 + if distance > 0: + return 7.5 + return 10.0 + + +def get_zodiac_sign(longitude: Union[float, pd.Series]) -> Union[int, pd.Series]: + """Convert ecliptic longitude to zodiac sign index (0=Aries..11=Pisces).""" + if isinstance(longitude, pd.Series): + return (longitude // 30).astype(int) % 12 + return int(longitude // 30) % 12 + + +def get_planet_longitude(ephemeris_df: pd.DataFrame, planet: str) -> pd.Series: + """Extract longitude series for one planet from ephemeris.""" + try: + return ephemeris_df.xs(planet, level="ticker")["longitude"] + except KeyError: + logger.warning("Planet '%s' not found in ephemeris data.", planet) + return pd.Series(dtype="float64") + + +def get_planet_field(ephemeris_df: pd.DataFrame, planet: str, field: str) -> pd.Series: + """Extract a field series for one planet from ephemeris.""" + try: + return ephemeris_df.xs(planet, level="ticker")[field] + except KeyError: + logger.warning("Field '%s' for planet '%s' not found.", field, planet) + return pd.Series(dtype="float64") + + +def event_impact_kernel( + binary_signal: pd.Series, + halflife_forward: int = 7, + halflife_backward: int = 3, + max_multiples: int = 4, +) -> pd.Series: + """Convert a binary event flag into a smooth anticipation/decay kernel.""" + if binary_signal.empty: + return binary_signal.copy() + + n = len(binary_signal) + values = binary_signal.values.astype(float) + result = np.zeros(n) + + fwd_len = halflife_forward * max_multiples + fwd_kernel = np.exp(-np.arange(fwd_len) * np.log(2) / halflife_forward) + + bwd_len = halflife_backward * max_multiples + bwd_kernel = np.exp(-np.arange(1, bwd_len + 1) * np.log(2) / halflife_backward) + + event_idx = np.where(values > 0)[0] + + for idx in event_idx: + fwd_end = min(idx + fwd_len, n) + fwd_slice = slice(idx, fwd_end) + result[fwd_slice] = np.maximum(result[fwd_slice], fwd_kernel[: fwd_end - idx]) + + bwd_start = max(idx - bwd_len, 0) + bwd_slice = slice(bwd_start, idx) + bwd_vals = bwd_kernel[: idx - bwd_start][::-1] + result[bwd_slice] = np.maximum(result[bwd_slice], bwd_vals) + + return pd.Series(result, index=binary_signal.index, name=binary_signal.name) + + +def smooth_binary_features( + binary_df: pd.DataFrame, + halflives: Optional[List[int]] = None, + include_density: bool = True, + density_windows: Optional[List[int]] = None, +) -> pd.DataFrame: + """Apply multi-scale smoothing to binary event features.""" + if halflives is None: + halflives = [3, 7, 14, 30, 60, 90] + if density_windows is None: + density_windows = [30, 90] + + results = {} + for col in binary_df.columns: + series = binary_df[col] + for hl in halflives: + bwd_hl = max(1, hl // 3) + smoothed = event_impact_kernel(series, halflife_forward=hl, halflife_backward=bwd_hl) + results[f"{col}_smooth_{hl}d"] = smoothed + + if include_density: + for window in density_windows: + density = binary_df.rolling(window, min_periods=1).sum() + density.columns = [f"{c}_density_{window}d" for c in binary_df.columns] + for col in density.columns: + results[col] = density[col] + + return pd.DataFrame(results) + + +def get_natal_positions(natal_date: Union[str, pd.Timestamp], planets: List[str]) -> Dict[str, float]: + """Compute natal planet longitudes using cryptodatapy Ephemeris when available.""" + try: + from cryptodatapy.extract.libraries.ephemeris import Ephemeris + except Exception: + logger.warning( + "cryptodatapy ephemeris dependency is unavailable; natal transit features will be empty." + ) + return {} + + natal_eph = Ephemeris(start_date=natal_date, end_date=natal_date, freq="d") + positions: Dict[str, float] = {} + for planet in planets: + try: + lon = natal_eph.get_planet_longitude(planet) + if not lon.empty: + positions[planet] = float(lon.iloc[0]) + except Exception: + continue + return positions diff --git a/src/factorlab/factors/astrology/cyclical_encoding.py b/src/factorlab/factors/astrology/cyclical_encoding.py new file mode 100644 index 0000000..3d50356 --- /dev/null +++ b/src/factorlab/factors/astrology/cyclical_encoding.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from typing import List, Optional + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import get_dates_planets, get_planet_field + + +class CyclicalEncoding(AstrologyFactor): + """Sin/cos encoding for circular planetary fields.""" + + def __init__( + self, + planets: Optional[List[str]] = None, + fields: Optional[List[str]] = None, + **kwargs, + ): + super().__init__( + description="Sin/cos cyclical encodings for planetary coordinates.", + tags=["astrology", "encoding", "cyclical"], + **kwargs, + ) + self.planets = planets + self.fields = fields + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or available_planets + fields = self.fields or ["longitude"] + + results = {} + for planet in planets: + for field in fields: + series = get_planet_field(ephemeris_df, planet, field) + if series.empty: + continue + series = series.reindex(dates) + + if field == "longitude": + rad = series * np.pi / 180.0 + elif field == "declination": + rad = (series + 90) * np.pi / 180.0 + else: + rad = series * np.pi / 180.0 + + results[f"{planet}_{field}_sin"] = np.sin(rad) + results[f"{planet}_{field}_cos"] = np.cos(rad) + + return pd.DataFrame(results) diff --git a/src/factorlab/factors/astrology/declination_aspects.py b/src/factorlab/factors/astrology/declination_aspects.py new file mode 100644 index 0000000..53cb763 --- /dev/null +++ b/src/factorlab/factors/astrology/declination_aspects.py @@ -0,0 +1,48 @@ +from __future__ import annotations + +from typing import List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import get_dates_planets, get_planet_field + + +class DeclinationAspects(AstrologyFactor): + """Declination parallel and contra-parallel aspect flags.""" + + def __init__( + self, + planets: Optional[List[str]] = None, + orb: float = 1.0, + **kwargs, + ): + super().__init__( + description="Declination parallel and contra-parallel aspects.", + tags=["astrology", "declination", "jensen"], + **kwargs, + ) + self.planets = planets + self.orb = orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or [p for p in available_planets if p != "north_node"] + + results = {} + for i, p1 in enumerate(planets): + decl1 = get_planet_field(ephemeris_df, p1, "declination").reindex(dates) + if decl1.empty: + continue + for p2 in planets[i + 1 :]: + decl2 = get_planet_field(ephemeris_df, p2, "declination").reindex(dates) + if decl2.empty: + continue + + parallel_dist = (decl1 - decl2).abs() + contra_dist = (decl1 + decl2).abs() + + results[f"{p1}_{p2}_parallel"] = (parallel_dist <= self.orb).astype(int) + results[f"{p1}_{p2}_contra_parallel"] = (contra_dist <= self.orb).astype(int) + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/dewey_oscillators.py b/src/factorlab/factors/astrology/dewey_oscillators.py new file mode 100644 index 0000000..d433ab3 --- /dev/null +++ b/src/factorlab/factors/astrology/dewey_oscillators.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from typing import Dict, Optional + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import DEWEY_CYCLES, get_dates_planets + + +class DeweyOscillators(AstrologyFactor): + """Idealized Dewey cycle oscillators.""" + + def __init__( + self, + cycles: Optional[Dict[str, dict]] = None, + data_driven: bool = False, + target_series: Optional[pd.Series] = None, + fit_window: int = 2520, + **kwargs, + ): + super().__init__( + description="Dewey oscillator features for validated cycle periods.", + tags=["astrology", "dewey", "cycles"], + **kwargs, + ) + self.cycles = cycles + self.data_driven = data_driven + self.target_series = target_series + self.fit_window = fit_window + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + cycles = self.cycles or DEWEY_CYCLES + t = (dates - pd.Timestamp("1900-01-01")).days.values.astype(float) + + results = {} + for name, params in cycles.items(): + period = params["period_days"] + ref_date = pd.Timestamp(params["ref_trough"]) + t0 = (ref_date - pd.Timestamp("1900-01-01")).days + + osc = np.sin(2 * np.pi * (t - t0) / period) + osc_cos = np.cos(2 * np.pi * (t - t0) / period) + results[f"dewey_{name}_sin"] = pd.Series(osc, index=dates) + results[f"dewey_{name}_cos"] = pd.Series(osc_cos, index=dates) + + if self.data_driven and self.target_series is not None: + target_aligned = self.target_series.reindex(dates).dropna() + if len(target_aligned) > self.fit_window: + for name, params in cycles.items(): + period = params["period_days"] + t_target = (target_aligned.index - pd.Timestamp("1900-01-01")).days.values.astype(float) + + sin_comp = np.sin(2 * np.pi * t_target / period) + cos_comp = np.cos(2 * np.pi * t_target / period) + + sin_s = pd.Series(sin_comp, index=target_aligned.index) + cos_s = pd.Series(cos_comp, index=target_aligned.index) + + a_coef = target_aligned.rolling(self.fit_window).corr(sin_s) + b_coef = target_aligned.rolling(self.fit_window).corr(cos_s) + + sin_full = np.sin(2 * np.pi * t / period) + cos_full = np.cos(2 * np.pi * t / period) + a_full = a_coef.reindex(dates).ffill().fillna(0) + b_full = b_coef.reindex(dates).ffill().fillna(0) + + fitted = a_full * sin_full + b_full * cos_full + fitted_max = fitted.abs().expanding().max().replace(0, 1) + results[f"dewey_{name}_fitted"] = fitted / fitted_max + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/eclipse_score.py b/src/factorlab/factors/astrology/eclipse_score.py new file mode 100644 index 0000000..7b8b364 --- /dev/null +++ b/src/factorlab/factors/astrology/eclipse_score.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import compute_aspect_distance, get_dates_planets, get_planet_longitude + + +class EclipseScore(AstrologyFactor): + """Lunation/eclipse detection and decayed impact score.""" + + def __init__(self, decay_window: int = 14, eclipse_orb: float = 1.5, **kwargs): + super().__init__( + description="Solar/lunar eclipse proximity and weighted impact score.", + tags=["astrology", "eclipse", "lunation"], + **kwargs, + ) + self.decay_window = decay_window + self.eclipse_orb = eclipse_orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + sun_lon = get_planet_longitude(ephemeris_df, "sun").reindex(dates) + moon_lon = get_planet_longitude(ephemeris_df, "moon").reindex(dates) + node_lon = get_planet_longitude(ephemeris_df, "north_node").reindex(dates) + + if sun_lon.empty or moon_lon.empty or node_lon.empty: + return pd.DataFrame(index=dates) + + elongation = (moon_lon - sun_lon) % 360 + lunation_orb = 12.0 + new_moon = (elongation < lunation_orb) | (elongation > (360 - lunation_orb)) + full_moon = (elongation > (180 - lunation_orb)) & (elongation < (180 + lunation_orb)) + + sun_node_dist = compute_aspect_distance(sun_lon, node_lon, 0) + sun_south_node_dist = compute_aspect_distance(sun_lon, node_lon, 180) + near_node = (sun_node_dist <= self.eclipse_orb) | (sun_south_node_dist <= self.eclipse_orb) + + solar_eclipse = (new_moon & near_node).astype(int) + lunar_eclipse = (full_moon & near_node).astype(int) + eclipse_events = (solar_eclipse | lunar_eclipse).astype(float) + + score = pd.Series(0.0, index=dates) + eclipse_dates = dates[eclipse_events > 0] + if len(eclipse_dates) > 0: + date_ordinals = np.array([d.toordinal() for d in dates]) + for d in eclipse_dates: + delta = date_ordinals - d.toordinal() + mask = (delta >= 0) & (delta <= self.decay_window) + decay = 1.0 - delta[mask] / self.decay_window + score.iloc[mask] = np.maximum(score.iloc[mask].values, decay) + + return pd.DataFrame( + { + "solar_eclipse": solar_eclipse, + "lunar_eclipse": lunar_eclipse, + "eclipse_score": score, + "eclipse_weighted_score": score * (1 + solar_eclipse.reindex(dates).fillna(0)), + } + ) diff --git a/src/factorlab/factors/astrology/essential_dignity.py b/src/factorlab/factors/astrology/essential_dignity.py new file mode 100644 index 0000000..21e5193 --- /dev/null +++ b/src/factorlab/factors/astrology/essential_dignity.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +from typing import List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import DIGNITY_TABLE, get_dates_planets, get_planet_longitude, get_zodiac_sign + + +class EssentialDignity(AstrologyFactor): + """Essential dignity scores by planet and aggregate.""" + + def __init__(self, planets: Optional[List[str]] = None, **kwargs): + super().__init__( + description="Planetary essential dignity and aggregate strength scores.", + tags=["astrology", "dignity", "classical"], + **kwargs, + ) + self.planets = planets + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or [p for p in available_planets if p in DIGNITY_TABLE] + + results = {} + for planet in planets: + if planet not in DIGNITY_TABLE: + continue + lon = get_planet_longitude(ephemeris_df, planet).reindex(dates) + if lon.empty: + continue + + sign = get_zodiac_sign(lon) + score = sign.map(lambda s: DIGNITY_TABLE[planet].get(s, 0)) + results[f"{planet}_dignity"] = score + + df = pd.DataFrame(results) + if not df.empty: + df["dignity_aggregate"] = df.sum(axis=1) + return df diff --git a/src/factorlab/factors/astrology/gann_square_of_nine.py b/src/factorlab/factors/astrology/gann_square_of_nine.py new file mode 100644 index 0000000..7dfc219 --- /dev/null +++ b/src/factorlab/factors/astrology/gann_square_of_nine.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import logging + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import get_dates_planets, get_planet_longitude + +logger = logging.getLogger(__name__) + + +class GannSquareOfNine(AstrologyFactor): + """Square-of-Nine alignment and support/resistance distances.""" + + def __init__(self, price_col: str = "close", anchor_planet: str = "sun", **kwargs): + super().__init__( + description="Gann Square of Nine support/resistance and alignment features.", + tags=["astrology", "gann", "square_of_nine"], + **kwargs, + ) + self.price_col = price_col + self.anchor_planet = anchor_planet + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + if self.price_df is None: + logger.warning("No price data provided for Gann So9.") + return pd.DataFrame(index=dates) + + if isinstance(self.price_df.index, pd.MultiIndex): + try: + price = self.price_df[self.price_col].groupby("date").mean().reindex(dates) + except Exception: + price = self.price_df[self.price_col].reindex(dates) + else: + price = self.price_df[self.price_col].reindex(dates) + + if price.empty or price.isna().all(): + return pd.DataFrame(index=dates) + + sqrt_price = np.sqrt(price.abs()) + price_degree = (sqrt_price % 1) * 360 + + results = {"so9_price_degree": price_degree} + + cardinal_angles = [0, 45, 90, 135, 180, 225, 270, 315] + min_dist = pd.Series(180.0, index=dates) + for angle in cardinal_angles: + dist = (price_degree - angle).abs() + dist = dist.where(dist <= 180, 360 - dist) + min_dist = min_dist.where(min_dist < dist, dist) + results["so9_cardinal_dist"] = min_dist + + for rotation, label in [(1, "90"), (2, "180"), (3, "270"), (4, "360")]: + sr_up = (sqrt_price + rotation * 0.5) ** 2 + sr_down = (sqrt_price - rotation * 0.5).clip(lower=0) ** 2 + results[f"so9_sr_up_{label}"] = sr_up + results[f"so9_sr_down_{label}"] = sr_down + + sr_up_180 = (sqrt_price + 1) ** 2 + sr_down_180 = (sqrt_price - 1).clip(lower=0) ** 2 + results["so9_dist_up_pct"] = (sr_up_180 - price) / price + results["so9_dist_down_pct"] = (price - sr_down_180) / price + + planet_lon = get_planet_longitude(ephemeris_df, self.anchor_planet).reindex(dates) + if not planet_lon.empty: + alignment_dist = (price_degree - planet_lon).abs() + alignment_dist = alignment_dist.where(alignment_dist <= 180, 360 - alignment_dist) + results[f"so9_{self.anchor_planet}_alignment"] = alignment_dist + results[f"so9_{self.anchor_planet}_conjunct"] = ( + (alignment_dist < 5).astype(float).fillna(0).astype(int) + ) + + return pd.DataFrame(results) diff --git a/src/factorlab/factors/astrology/heliocentric_features.py b/src/factorlab/factors/astrology/heliocentric_features.py new file mode 100644 index 0000000..237a277 --- /dev/null +++ b/src/factorlab/factors/astrology/heliocentric_features.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +import logging +from typing import List, Optional + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ASPECT_ANGLES, DEFAULT_ORBS, compute_aspect_distance, get_dates_planets, get_zodiac_sign + +logger = logging.getLogger(__name__) + + +class HeliocentricFeatures(AstrologyFactor): + """Heliocentric signs, phases, and in-orb aspect flags.""" + + def __init__( + self, + helio_ephemeris_df: Optional[pd.DataFrame] = None, + planets: Optional[List[str]] = None, + aspect_types: Optional[List[str]] = None, + orb: float = 10.0, + **kwargs, + ): + super().__init__( + description="Heliocentric feature set for lead-lag testing.", + tags=["astrology", "heliocentric", "jensen"], + **kwargs, + ) + self.helio_ephemeris_df = helio_ephemeris_df + self.planets = planets + self.aspect_types = aspect_types + self.orb = orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + helio_ephemeris_df = self.helio_ephemeris_df + if helio_ephemeris_df is None: + logger.warning("No heliocentric ephemeris provided; skipping.") + return pd.DataFrame(index=dates) + + planets = self.planets + if planets is None: + planets = [ + p + for p in helio_ephemeris_df.index.get_level_values("ticker").unique() + if p not in ("sun", "moon", "north_node") + ] + + aspect_types = self.aspect_types or ["conjunction", "sextile", "square", "trine", "opposition"] + + results = {} + for planet in planets: + try: + lon = helio_ephemeris_df.xs(planet, level="ticker")["longitude"].reindex(dates) + except KeyError: + continue + if lon.empty: + continue + + results[f"helio_{planet}_sign"] = get_zodiac_sign(lon) + rad = lon * np.pi / 180.0 + results[f"helio_{planet}_lon_sin"] = np.sin(rad) + results[f"helio_{planet}_lon_cos"] = np.cos(rad) + + helio_pairs = [("jupiter", "saturn"), ("jupiter", "uranus"), ("saturn", "neptune"), ("saturn", "pluto")] + for p1, p2 in helio_pairs: + try: + lon1 = helio_ephemeris_df.xs(p1, level="ticker")["longitude"].reindex(dates) + lon2 = helio_ephemeris_df.xs(p2, level="ticker")["longitude"].reindex(dates) + except KeyError: + continue + if lon1.empty or lon2.empty: + continue + + phase = (lon1 - lon2) % 360 + rad = phase * np.pi / 180.0 + results[f"helio_{p1}_{p2}_phase"] = phase + results[f"helio_{p1}_{p2}_phase_sin"] = np.sin(rad) + results[f"helio_{p1}_{p2}_phase_cos"] = np.cos(rad) + + for aspect_name in aspect_types: + angle = ASPECT_ANGLES[aspect_name] + aspect_orb = DEFAULT_ORBS.get(aspect_name, self.orb) + dist = compute_aspect_distance(lon1, lon2, angle) + results[f"helio_{p1}_{p2}_{aspect_name}_in_orb"] = (dist <= aspect_orb).astype(int) + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/lunar_features.py b/src/factorlab/factors/astrology/lunar_features.py new file mode 100644 index 0000000..029a5ad --- /dev/null +++ b/src/factorlab/factors/astrology/lunar_features.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + compute_aspect_distance, + get_dates_planets, + get_planet_field, + get_planet_longitude, +) + + +class LunarFeatures(AstrologyFactor): + """Lunar phase, lunation, and node-linked lunar features.""" + + def __init__(self, **kwargs): + super().__init__( + description="Lunar phase, new/full moon, and node-linked lunar features.", + tags=["astrology", "moon", "lunation"], + **kwargs, + ) + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + moon_lon = get_planet_longitude(ephemeris_df, "moon").reindex(dates) + sun_lon = get_planet_longitude(ephemeris_df, "sun").reindex(dates) + + phase = (moon_lon - sun_lon) % 360 + results = { + "lunar_phase": phase, + "new_moon": ((phase < 15) | (phase > 345)).astype(int), + "full_moon": ((phase > 165) & (phase < 195)).astype(int), + } + + moon_decl = get_planet_field(ephemeris_df, "moon", "declination").reindex(dates) + if not moon_decl.empty: + results["moon_decl_extreme"] = (moon_decl.abs() > 23.44).astype(int) + + north_node_lon = get_planet_longitude(ephemeris_df, "north_node").reindex(dates) + if not north_node_lon.empty and not moon_lon.empty: + node_dist = compute_aspect_distance(moon_lon, north_node_lon, 0) + results["moon_north_node_conj"] = (node_dist < 10).astype(int) + + return pd.DataFrame(results) diff --git a/src/factorlab/factors/astrology/mcwhirter_nodal_cycle.py b/src/factorlab/factors/astrology/mcwhirter_nodal_cycle.py new file mode 100644 index 0000000..b64b793 --- /dev/null +++ b/src/factorlab/factors/astrology/mcwhirter_nodal_cycle.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import MCWHIRTER_BULLISH_SIGNS, get_dates_planets, get_planet_longitude, get_zodiac_sign + + +class McWhirterNodalCycle(AstrologyFactor): + """McWhirter 18.6-year nodal cycle regime flags.""" + + def __init__(self, **kwargs): + super().__init__( + description="North Node trend and extreme-zone flags per McWhirter.", + tags=["astrology", "mcwhirter", "node"], + **kwargs, + ) + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + node_lon = get_planet_longitude(ephemeris_df, "north_node").reindex(dates) + if node_lon.empty: + return pd.DataFrame(index=dates) + + node_sign = get_zodiac_sign(node_lon) + degree_in_sign = node_lon % 30 + + results = { + "node_sign": node_sign, + "node_trend": node_sign.map(lambda s: 1 if s in MCWHIRTER_BULLISH_SIGNS else -1), + "node_extreme_top": ((node_sign == 4) & (degree_in_sign < 10)).astype(int), + "node_extreme_bottom": ((node_sign == 10) & (degree_in_sign < 10)).astype(int), + } + return pd.DataFrame(results) diff --git a/src/factorlab/factors/astrology/midpoint_activations.py b/src/factorlab/factors/astrology/midpoint_activations.py new file mode 100644 index 0000000..8aa988e --- /dev/null +++ b/src/factorlab/factors/astrology/midpoint_activations.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from typing import List, Optional, Tuple + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + compute_aspect_distance, + deg_to_lowest_180, + get_dates_planets, + get_planet_longitude, +) + + +class MidpointActivations(AstrologyFactor): + """Midpoint activation flags for trigger planets.""" + + def __init__( + self, + midpoint_pairs: Optional[List[Tuple[str, str]]] = None, + trigger_planets: Optional[List[str]] = None, + orb: float = 2.0, + **kwargs, + ): + super().__init__( + description="Hamburg midpoint activation features.", + tags=["astrology", "midpoints", "williams"], + **kwargs, + ) + self.midpoint_pairs = midpoint_pairs + self.trigger_planets = trigger_planets + self.orb = orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + midpoint_pairs = self.midpoint_pairs or [ + ("sun", "jupiter"), + ("sun", "saturn"), + ("sun", "moon"), + ("jupiter", "saturn"), + ("jupiter", "uranus"), + ("saturn", "neptune"), + ("venus", "jupiter"), + ("mars", "jupiter"), + ("mars", "saturn"), + ] + trigger_planets = self.trigger_planets or ["sun", "mars", "mercury", "moon"] + + results = {} + for p1, p2 in midpoint_pairs: + lon1 = get_planet_longitude(ephemeris_df, p1).reindex(dates) + lon2 = get_planet_longitude(ephemeris_df, p2).reindex(dates) + if lon1.empty or lon2.empty: + continue + + diff = deg_to_lowest_180(lon1 - lon2) + midpoint = (lon2 + diff / 2) % 360 + + for trigger in trigger_planets: + if trigger in (p1, p2): + continue + trigger_lon = get_planet_longitude(ephemeris_df, trigger).reindex(dates) + if trigger_lon.empty: + continue + + conj_dist = compute_aspect_distance(trigger_lon, midpoint, 0) + opp_dist = compute_aspect_distance(trigger_lon, midpoint, 180) + activation = ((conj_dist <= self.orb) | (opp_dist <= self.orb)).astype(int) + results[f"midpoint_{p1}_{p2}_{trigger}"] = activation + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/natal_transit_aspects.py b/src/factorlab/factors/astrology/natal_transit_aspects.py new file mode 100644 index 0000000..accddde --- /dev/null +++ b/src/factorlab/factors/astrology/natal_transit_aspects.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import List, Optional, Union + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + ASPECT_ANGLES, + aspect_weight, + compute_aspect_distance, + get_dates_planets, + get_natal_positions, + get_planet_longitude, +) + + +class NatalTransitAspects(AstrologyFactor): + """Transit-to-natal aspect activations.""" + + def __init__( + self, + planets: Optional[List[str]] = None, + natal_date: Optional[Union[str, pd.Timestamp]] = None, + aspect_types: Optional[List[str]] = None, + orb: float = 10.0, + **kwargs, + ): + super().__init__( + natal_date=natal_date, + description="Aspects between transiting planets and natal chart positions.", + tags=["astrology", "natal", "transit"], + **kwargs, + ) + self.planets = planets + self.aspect_types = aspect_types + self.orb = orb + self.natal_date_override = pd.Timestamp(natal_date) if natal_date else None + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or available_planets + aspect_types = self.aspect_types or ["conjunction", "sextile", "square", "trine", "opposition"] + natal_date = self.natal_date_override or self.natal_date + + natal_positions = get_natal_positions(natal_date, planets) + if not natal_positions: + return pd.DataFrame(index=dates) + + results = {} + for planet in planets: + if planet not in natal_positions: + continue + natal_lon = natal_positions[planet] + transit_lon = get_planet_longitude(ephemeris_df, planet) + if transit_lon.empty: + continue + + transit_lon = transit_lon.reindex(dates) + for aspect_name in aspect_types: + angle = ASPECT_ANGLES[aspect_name] + dist = compute_aspect_distance(transit_lon, natal_lon, angle) + weight = aspect_weight(dist, self.orb) + in_orb = (dist <= self.orb).astype(int) + + prefix = f"natal_{planet}_{aspect_name}" + results[f"{prefix}_dist"] = dist + results[f"{prefix}_in_orb"] = in_orb + results[f"{prefix}_weight"] = weight + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/planetary_aspects.py b/src/factorlab/factors/astrology/planetary_aspects.py new file mode 100644 index 0000000..8a9b657 --- /dev/null +++ b/src/factorlab/factors/astrology/planetary_aspects.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from typing import List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + ASPECT_ANGLES, + DEFAULT_ORBS, + aspect_weight, + compute_aspect_distance, + get_dates_planets, + get_planet_longitude, +) + + +class PlanetaryAspects(AstrologyFactor): + """Angular planetary aspect distances, flags, and weights.""" + + def __init__( + self, + planets: Optional[List[str]] = None, + aspect_types: Optional[List[str]] = None, + orb: float = 10.0, + **kwargs, + ): + super().__init__( + description="Planetary aspect distances, in-orb flags, and aspect weights.", + tags=["astrology", "aspects", "synodic"], + **kwargs, + ) + self.planets = planets + self.aspect_types = aspect_types + self.orb = orb + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or available_planets + aspect_types = self.aspect_types or [ + "conjunction", + "semi_square", + "sextile", + "square", + "trine", + "sesquiquadrate", + "opposition", + ] + + results = {} + for i, p1 in enumerate(planets): + lon1 = get_planet_longitude(ephemeris_df, p1) + if lon1.empty: + continue + for p2 in planets[i + 1 :]: + lon2 = get_planet_longitude(ephemeris_df, p2) + if lon2.empty: + continue + lon1_a, lon2_a = lon1.align(lon2, join="inner") + + for aspect_name in aspect_types: + angle = ASPECT_ANGLES[aspect_name] + aspect_orb = DEFAULT_ORBS.get(aspect_name, self.orb) + dist = compute_aspect_distance(lon1_a, lon2_a, angle) + in_orb = (dist <= aspect_orb).astype(int) + weight = aspect_weight(dist, aspect_orb) + + prefix = f"{p1}_{p2}_{aspect_name}" + results[f"{prefix}_dist"] = dist + results[f"{prefix}_in_orb"] = in_orb + results[f"{prefix}_weight"] = weight + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/planetary_ingress.py b/src/factorlab/factors/astrology/planetary_ingress.py new file mode 100644 index 0000000..448f167 --- /dev/null +++ b/src/factorlab/factors/astrology/planetary_ingress.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +from typing import List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import ( + get_dates_planets, + get_planet_longitude, + get_zodiac_sign, +) + + +class PlanetaryIngress(AstrologyFactor): + """Zodiac sign and ingress event flags per planet.""" + + def __init__(self, planets: Optional[List[str]] = None, **kwargs): + super().__init__( + description="Planetary ingress events by zodiac sign boundary crossing.", + tags=["astrology", "ingress", "zodiac"], + **kwargs, + ) + self.planets = planets + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, available_planets = get_dates_planets(ephemeris_df) + planets = self.planets or available_planets + + results = {} + for planet in planets: + lon = get_planet_longitude(ephemeris_df, planet) + if lon.empty: + continue + + lon = lon.reindex(dates) + sign = get_zodiac_sign(lon) + sign_prev = sign.shift(1) + ingress = (sign != sign_prev).astype(int) + ingress.iloc[0] = 0 + + results[f"{planet}_sign"] = sign + results[f"{planet}_ingress"] = ingress + + return pd.DataFrame(results) diff --git a/src/factorlab/factors/astrology/planetary_speed_features.py b/src/factorlab/factors/astrology/planetary_speed_features.py new file mode 100644 index 0000000..c856dc9 --- /dev/null +++ b/src/factorlab/factors/astrology/planetary_speed_features.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from typing import List, Optional + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import get_dates_planets, get_planet_field + + +class PlanetarySpeedFeatures(AstrologyFactor): + """Speed, station, and normalized motion features.""" + + def __init__( + self, + planets: Optional[List[str]] = None, + station_threshold: float = 0.05, + **kwargs, + ): + super().__init__( + description="Planetary speed and station-state features.", + tags=["astrology", "speed", "retrograde"], + **kwargs, + ) + self.planets = planets + self.station_threshold = station_threshold + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + planets = self.planets or [ + "mercury", + "venus", + "mars", + "jupiter", + "saturn", + "uranus", + "neptune", + "pluto", + ] + + results = {} + for planet in planets: + speed = get_planet_field(ephemeris_df, planet, "speed").reindex(dates) + if speed.empty: + continue + + abs_speed = speed.abs() + speed_mean = speed.expanding().mean() + speed_std = speed.expanding().std().replace(0, np.nan) + + results[f"{planet}_speed"] = speed + results[f"{planet}_speed_pct"] = speed.expanding().rank(pct=True) + results[f"{planet}_station"] = (abs_speed < self.station_threshold).astype(int) + results[f"{planet}_speed_zscore"] = (speed - speed_mean) / speed_std + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) diff --git a/src/factorlab/factors/astrology/price_longitude_angles.py b/src/factorlab/factors/astrology/price_longitude_angles.py new file mode 100644 index 0000000..3439bf8 --- /dev/null +++ b/src/factorlab/factors/astrology/price_longitude_angles.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +import logging +from typing import Optional + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import deg_to_lowest_180, get_dates_planets, get_planet_longitude + +logger = logging.getLogger(__name__) + + +class PriceLongitudeAngles(AstrologyFactor): + """Gann-style price projections from planetary longitude motion.""" + + def __init__( + self, + planet: str = "sun", + anchor_price: float = 1.0, + scale: float = 1.0, + mode: str = "single", + planet2: Optional[str] = None, + **kwargs, + ): + super().__init__( + description="Price-longitude projections using Gann fan ratios.", + tags=["astrology", "gann", "price"], + **kwargs, + ) + self.planet = planet + self.anchor_price = anchor_price + self.scale = scale + self.mode = mode + self.planet2 = planet2 + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + lon1 = get_planet_longitude(ephemeris_df, self.planet).reindex(dates) + + if self.mode == "single": + lon = lon1 + elif self.mode in ("average", "synodic") and self.planet2 is not None: + lon2 = get_planet_longitude(ephemeris_df, self.planet2).reindex(dates) + if self.mode == "average": + lon = (lon1 + lon2) / 2 + else: + lon = deg_to_lowest_180(lon1 - lon2).abs() + else: + logger.warning("Invalid mode '%s' or missing planet2.", self.mode) + return pd.DataFrame(index=dates) + + lon_diff = lon.diff() + lon_diff = lon_diff.where(lon_diff.abs() < 180, lon_diff - np.sign(lon_diff) * 360) + accumulated = lon_diff.cumsum().fillna(0) + + gann_ratios = { + "1x1": 1.0, + "1x2": 0.5, + "2x1": 2.0, + "1x3": 1.0 / 3.0, + "3x1": 3.0, + "1x4": 0.25, + "4x1": 4.0, + "1x8": 0.125, + "8x1": 8.0, + } + + results = { + f"{self.planet}_gann_{name}": self.anchor_price + accumulated * self.scale * ratio + for name, ratio in gann_ratios.items() + } + return pd.DataFrame(results) diff --git a/src/factorlab/factors/astrology/retrograde_indicator.py b/src/factorlab/factors/astrology/retrograde_indicator.py new file mode 100644 index 0000000..64aad3b --- /dev/null +++ b/src/factorlab/factors/astrology/retrograde_indicator.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import List, Optional + +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import get_dates_planets, get_planet_field + + +class RetrogradeIndicator(AstrologyFactor): + """Planetary retrograde flags and aggregate count.""" + + def __init__(self, planets: Optional[List[str]] = None, **kwargs): + super().__init__( + description="Planetary retrograde indicators.", + tags=["astrology", "retrograde", "motion"], + **kwargs, + ) + self.planets = planets + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + planets = self.planets or [ + "mercury", + "venus", + "mars", + "jupiter", + "saturn", + "uranus", + "neptune", + "pluto", + ] + + results = {} + for planet in planets: + speed = get_planet_field(ephemeris_df, planet, "speed") + if speed.empty: + continue + speed = speed.reindex(dates) + results[f"{planet}_retrograde"] = (speed < 0).astype(int) + + df = pd.DataFrame(results) + if not df.empty: + df["retrograde_count"] = df.sum(axis=1) + return df diff --git a/src/factorlab/factors/astrology/synodic_cycle_phase.py b/src/factorlab/factors/astrology/synodic_cycle_phase.py new file mode 100644 index 0000000..2e72b18 --- /dev/null +++ b/src/factorlab/factors/astrology/synodic_cycle_phase.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import List, Optional + +import numpy as np +import pandas as pd + +from factorlab.factors.astrology.base import AstrologyFactor +from factorlab.factors.astrology.common import SYNODIC_PAIRS, get_dates_planets, get_planet_longitude + + +class SynodicCyclePhase(AstrologyFactor): + """Synodic phase angle and encodings for major planetary pairs.""" + + def __init__(self, pairs: Optional[List[str]] = None, **kwargs): + super().__init__( + description="Synodic cycle phase features for major planetary pairs.", + tags=["astrology", "synodic", "phase"], + **kwargs, + ) + self.pairs = pairs + + def _compute_astrology(self, ephemeris_df: pd.DataFrame) -> pd.DataFrame: + dates, _ = get_dates_planets(ephemeris_df) + pairs = self.pairs or list(SYNODIC_PAIRS.keys()) + + results = {} + for pair_name in pairs: + if pair_name not in SYNODIC_PAIRS: + continue + + p1, p2 = SYNODIC_PAIRS[pair_name] + lon1 = get_planet_longitude(ephemeris_df, p1).reindex(dates) + lon2 = get_planet_longitude(ephemeris_df, p2).reindex(dates) + if lon1.empty or lon2.empty: + continue + + phase = (lon1 - lon2) % 360 + phase_label = (phase // 45).astype(int).clip(0, 7) + rad = phase * np.pi / 180.0 + + results[f"{pair_name}_phase"] = phase + results[f"{pair_name}_phase_label"] = phase_label + results[f"{pair_name}_phase_sin"] = np.sin(rad) + results[f"{pair_name}_phase_cos"] = np.cos(rad) + + return pd.DataFrame(results) if results else pd.DataFrame(index=dates) From 0b1ef4d61fb6c7f24f93f262463a7bac1e2916c3 Mon Sep 17 00:00:00 2001 From: Misha Kutsovsky Date: Wed, 18 Feb 2026 19:38:29 -0500 Subject: [PATCH 6/6] chore: remove accidental volume-factor changes from astrology branch --- src/factorlab/factors/volume/__init__.py | 41 ---- src/factorlab/factors/volume/base.py | 187 ------------------ .../volume/delta_negative_volume_indicator.py | 37 ---- .../factors/volume/delta_on_balance_volume.py | 31 --- .../volume/delta_positive_volume_indicator.py | 37 ---- .../factors/volume/delta_price_volume_fit.py | 36 ---- .../volume/delta_product_price_volume.py | 73 ------- .../factors/volume/delta_sum_price_volume.py | 74 ------- .../factors/volume/delta_volume_momentum.py | 26 --- .../factors/volume/diff_price_volume_fit.py | 39 ---- .../volume/diff_volume_weighted_ma_over_ma.py | 35 ---- .../volume/negative_volume_indicator.py | 35 ---- .../factors/volume/on_balance_volume.py | 29 --- .../volume/positive_volume_indicator.py | 35 ---- .../factors/volume/price_volume_fit.py | 33 ---- .../factors/volume/product_price_volume.py | 70 ------- .../factors/volume/sum_price_volume.py | 71 ------- src/factorlab/factors/volume/volume.py | 132 ------------- .../factors/volume/volume_momentum.py | 25 --- .../volume/volume_weighted_ma_over_ma.py | 29 --- tests/conftest.py | 10 - tests/features/test_volume_factors.py | 127 ------------ 22 files changed, 1212 deletions(-) delete mode 100644 src/factorlab/factors/volume/__init__.py delete mode 100644 src/factorlab/factors/volume/base.py delete mode 100644 src/factorlab/factors/volume/delta_negative_volume_indicator.py delete mode 100644 src/factorlab/factors/volume/delta_on_balance_volume.py delete mode 100644 src/factorlab/factors/volume/delta_positive_volume_indicator.py delete mode 100644 src/factorlab/factors/volume/delta_price_volume_fit.py delete mode 100644 src/factorlab/factors/volume/delta_product_price_volume.py delete mode 100644 src/factorlab/factors/volume/delta_sum_price_volume.py delete mode 100644 src/factorlab/factors/volume/delta_volume_momentum.py delete mode 100644 src/factorlab/factors/volume/diff_price_volume_fit.py delete mode 100644 src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py delete mode 100644 src/factorlab/factors/volume/negative_volume_indicator.py delete mode 100644 src/factorlab/factors/volume/on_balance_volume.py delete mode 100644 src/factorlab/factors/volume/positive_volume_indicator.py delete mode 100644 src/factorlab/factors/volume/price_volume_fit.py delete mode 100644 src/factorlab/factors/volume/product_price_volume.py delete mode 100644 src/factorlab/factors/volume/sum_price_volume.py delete mode 100644 src/factorlab/factors/volume/volume.py delete mode 100644 src/factorlab/factors/volume/volume_momentum.py delete mode 100644 src/factorlab/factors/volume/volume_weighted_ma_over_ma.py delete mode 100644 tests/conftest.py delete mode 100644 tests/features/test_volume_factors.py diff --git a/src/factorlab/factors/volume/__init__.py b/src/factorlab/factors/volume/__init__.py deleted file mode 100644 index 2aeff80..0000000 --- a/src/factorlab/factors/volume/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -from factorlab.factors.volume.base import VolumeFactor -from factorlab.factors.volume.volume import Volume -from factorlab.factors.volume.volume_momentum import VolumeMomentum -from factorlab.factors.volume.delta_volume_momentum import DeltaVolumeMomentum -from factorlab.factors.volume.volume_weighted_ma_over_ma import VolumeWeightedMAOverMA -from factorlab.factors.volume.diff_volume_weighted_ma_over_ma import DiffVolumeWeightedMAOverMA -from factorlab.factors.volume.price_volume_fit import PriceVolumeFit -from factorlab.factors.volume.diff_price_volume_fit import DiffPriceVolumeFit -from factorlab.factors.volume.delta_price_volume_fit import DeltaPriceVolumeFit -from factorlab.factors.volume.on_balance_volume import OnBalanceVolume -from factorlab.factors.volume.delta_on_balance_volume import DeltaOnBalanceVolume -from factorlab.factors.volume.positive_volume_indicator import PositiveVolumeIndicator -from factorlab.factors.volume.delta_positive_volume_indicator import DeltaPositiveVolumeIndicator -from factorlab.factors.volume.negative_volume_indicator import NegativeVolumeIndicator -from factorlab.factors.volume.delta_negative_volume_indicator import DeltaNegativeVolumeIndicator -from factorlab.factors.volume.product_price_volume import ProductPriceVolume -from factorlab.factors.volume.sum_price_volume import SumPriceVolume -from factorlab.factors.volume.delta_product_price_volume import DeltaProductPriceVolume -from factorlab.factors.volume.delta_sum_price_volume import DeltaSumPriceVolume - -__all__ = [ - "VolumeFactor", - "Volume", - "VolumeMomentum", - "DeltaVolumeMomentum", - "VolumeWeightedMAOverMA", - "DiffVolumeWeightedMAOverMA", - "PriceVolumeFit", - "DiffPriceVolumeFit", - "DeltaPriceVolumeFit", - "OnBalanceVolume", - "DeltaOnBalanceVolume", - "PositiveVolumeIndicator", - "DeltaPositiveVolumeIndicator", - "NegativeVolumeIndicator", - "DeltaNegativeVolumeIndicator", - "ProductPriceVolume", - "SumPriceVolume", - "DeltaProductPriceVolume", - "DeltaSumPriceVolume", -] diff --git a/src/factorlab/factors/volume/base.py b/src/factorlab/factors/volume/base.py deleted file mode 100644 index e898e4f..0000000 --- a/src/factorlab/factors/volume/base.py +++ /dev/null @@ -1,187 +0,0 @@ -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any, List, Optional, Union - -import numpy as np -import pandas as pd - -from factorlab.factors.base import Factor -from factorlab.features.transforms.dispersion import Dispersion -from factorlab.features.transforms.returns import Difference, Returns -from factorlab.features.transforms.smoothing import WindowSmoother -from factorlab.utils import to_dataframe - - -class VolumeFactor(Factor, ABC): - """Base class for volume/price interaction factors.""" - - def __init__( - self, - price_col: str = "close", - volume_col: str = "volume", - output_col: Optional[str] = None, - compress: bool = True, - compression_window: int = 250, - compression_min_periods: int = 30, - compression_strength: float = 1.0, - **kwargs: Any, - ): - super().__init__( - name=self.__class__.__name__, - description="Base class for volume factors.", - category="Volume", - tags=["volume", "flow", "microstructure"], - ) - self.price_col = price_col - self.volume_col = volume_col - self.output_col = output_col - self.compress = compress - self.compression_window = compression_window - self.compression_min_periods = compression_min_periods - self.compression_strength = compression_strength - self.kwargs = kwargs - - @property - def inputs(self) -> List[str]: - return [self.price_col, self.volume_col] - - def fit( - self, - X: Union[pd.Series, pd.DataFrame], - y: Optional[Union[pd.Series, pd.DataFrame]] = None, - ) -> "VolumeFactor": - df_input = to_dataframe(X) - self.validate_inputs(df_input) - self._is_fitted = True - return self - - def transform(self, X: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame: - if not self._is_fitted: - raise RuntimeError(f"Transform '{self.name}' must be fitted before calling transform()") - - df = to_dataframe(X).copy(deep=True) - self.validate_inputs(df) - df = df.sort_index() - - factor = self._compute_volume(df) - if self.compress: - factor = self._compress(factor) - - df[self._generate_name()] = factor.clip(-50, 50) - return df - - @abstractmethod - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - raise NotImplementedError - - def _generate_name(self) -> str: - return self.output_col or self.name - - def _is_multiindex(self, series: pd.Series) -> bool: - return isinstance(series.index, pd.MultiIndex) - - def _safe_log(self, series: pd.Series) -> pd.Series: - return np.log(series.where(series > 0, np.nan)) - - def _series_frame(self, series: pd.Series, col: str = "value") -> pd.DataFrame: - return series.astype("float64").to_frame(col) - - def _shift_by_asset(self, series: pd.Series, periods: int) -> pd.Series: - if self._is_multiindex(series): - return series.groupby(level=1).shift(periods) - return series.shift(periods) - - def _pct_change_by_asset(self, series: pd.Series, periods: int = 1) -> pd.Series: - df = self._series_frame(series) - ret = Returns(method="pct", input_col="value", output_col="ret", lags=periods).compute(df) - return ret["ret"] - - def _diff_by_asset(self, series: pd.Series, periods: int = 1) -> pd.Series: - df = self._series_frame(series) - diff = Difference(input_col="value", output_col="diff", lags=periods).compute(df) - return diff["diff"] - - def _rolling_mean( - self, - series: pd.Series, - window: int, - min_periods: Optional[int] = None, - ) -> pd.Series: - min_periods = window if min_periods is None else min_periods - df = self._series_frame(series) - smoothed = WindowSmoother( - input_cols="value", - output_cols="mean", - window_type="rolling", - window_size=window, - central_tendency="mean", - min_periods=min_periods, - ).compute(df) - return smoothed["mean"] - - def _rolling_median( - self, - series: pd.Series, - window: int, - min_periods: Optional[int] = None, - ) -> pd.Series: - min_periods = window if min_periods is None else min_periods - df = self._series_frame(series) - smoothed = WindowSmoother( - input_cols="value", - output_cols="median", - window_type="rolling", - window_size=window, - central_tendency="median", - min_periods=min_periods, - ).compute(df) - return smoothed["median"] - - def _rolling_std( - self, - series: pd.Series, - window: int, - min_periods: Optional[int] = None, - ) -> pd.Series: - min_periods = 2 if min_periods is None else min_periods - df = self._series_frame(series) - dispersion = Dispersion( - method="std", - input_col="value", - output_col="std", - axis="ts", - window_type="rolling", - window_size=window, - min_periods=min_periods, - ).compute(df) - return dispersion["std"] - - def _rolling_stat( - self, - series: pd.Series, - window: int, - stat: str, - min_periods: Optional[int] = None, - **kwargs: Any, - ) -> pd.Series: - min_periods = window if min_periods is None else min_periods - - if self._is_multiindex(series): - rolled = getattr( - series.groupby(level=1).rolling(window=window, min_periods=min_periods), - stat, - )(**kwargs) - return rolled.droplevel(0).sort_index() - - return getattr(series.rolling(window=window, min_periods=min_periods), stat)(**kwargs) - - def _compress(self, raw: pd.Series) -> pd.Series: - robust_scale = self._rolling_median( - raw.abs(), - window=self.compression_window, - min_periods=self.compression_min_periods, - ).replace(0, np.nan) - - normalized = raw / robust_scale - return 50.0 * np.tanh(self.compression_strength * normalized) diff --git a/src/factorlab/factors/volume/delta_negative_volume_indicator.py b/src/factorlab/factors/volume/delta_negative_volume_indicator.py deleted file mode 100644 index 3207e47..0000000 --- a/src/factorlab/factors/volume/delta_negative_volume_indicator.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaNegativeVolumeIndicator(VolumeFactor): - def __init__(self, hist_length: int = 40, delta_dist: int = 35, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.delta_dist = delta_dist - self.name = "DeltaNegativeVolumeIndicator" - self.description = "Current minus lagged negative-volume indicator." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - rel_change = self._pct_change_by_asset(close, periods=1) - prev_volume = self._shift_by_asset(volume, 1) - filtered = rel_change.where(volume < prev_volume, 0.0) - - avg_change = self._rolling_mean(filtered, window=self.hist_length) - norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_std( - rel_change, - window=norm_window, - min_periods=self.hist_length, - ).replace(0, np.nan) - nvi = avg_change / std_change - - return nvi - self._shift_by_asset(nvi, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_on_balance_volume.py b/src/factorlab/factors/volume/delta_on_balance_volume.py deleted file mode 100644 index 1543a2d..0000000 --- a/src/factorlab/factors/volume/delta_on_balance_volume.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaOnBalanceVolume(VolumeFactor): - def __init__(self, hist_length: int = 50, delta_dist: int = 45, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.delta_dist = delta_dist - self.name = "DeltaOnBalanceVolume" - self.description = "Current minus lagged on-balance-volume signal." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - close_diff = self._diff_by_asset(close, 1) - signed_volume = volume * np.sign(close_diff) - - signed_sum = self._rolling_stat(signed_volume, window=self.hist_length, stat="sum") - total_sum = self._rolling_stat(volume, window=self.hist_length, stat="sum") - obv = signed_sum / total_sum.replace(0, np.nan) - - return obv - self._shift_by_asset(obv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_positive_volume_indicator.py b/src/factorlab/factors/volume/delta_positive_volume_indicator.py deleted file mode 100644 index 31b8889..0000000 --- a/src/factorlab/factors/volume/delta_positive_volume_indicator.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaPositiveVolumeIndicator(VolumeFactor): - def __init__(self, hist_length: int = 40, delta_dist: int = 35, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.delta_dist = delta_dist - self.name = "DeltaPositiveVolumeIndicator" - self.description = "Current minus lagged positive-volume indicator." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - rel_change = self._pct_change_by_asset(close, periods=1) - prev_volume = self._shift_by_asset(volume, 1) - filtered = rel_change.where(volume > prev_volume, 0.0) - - avg_change = self._rolling_mean(filtered, window=self.hist_length) - norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_std( - rel_change, - window=norm_window, - min_periods=self.hist_length, - ).replace(0, np.nan) - pvi = avg_change / std_change - - return pvi - self._shift_by_asset(pvi, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_price_volume_fit.py b/src/factorlab/factors/volume/delta_price_volume_fit.py deleted file mode 100644 index a44300c..0000000 --- a/src/factorlab/factors/volume/delta_price_volume_fit.py +++ /dev/null @@ -1,36 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaPriceVolumeFit(VolumeFactor): - def __init__(self, hist_length: int = 20, delta_dist: int = 30, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.delta_dist = delta_dist - self.name = "DeltaPriceVolumeFit" - self.description = "Current minus lagged price-volume fit slope." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - x = self._safe_log(volume) - y = self._safe_log(close) - - mean_x = self._rolling_mean(x, window=self.hist_length) - mean_y = self._rolling_mean(y, window=self.hist_length) - mean_xy = self._rolling_mean(x * y, window=self.hist_length) - mean_x2 = self._rolling_mean(x * x, window=self.hist_length) - - cov_xy = mean_xy - (mean_x * mean_y) - var_x = mean_x2 - (mean_x * mean_x) - pvf = cov_xy / var_x.replace(0, np.nan) - - return pvf - self._shift_by_asset(pvf, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_product_price_volume.py b/src/factorlab/factors/volume/delta_product_price_volume.py deleted file mode 100644 index 68da9a5..0000000 --- a/src/factorlab/factors/volume/delta_product_price_volume.py +++ /dev/null @@ -1,73 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaProductPriceVolume(VolumeFactor): - def __init__( - self, - hist_length: int = 40, - delta_dist: int = 35, - norm_lookback: int = 250, - norm_min_periods: int = 50, - **kwargs, - ): - super().__init__(**kwargs) - self.hist_length = hist_length - self.delta_dist = delta_dist - self.norm_lookback = norm_lookback - self.norm_min_periods = norm_min_periods - self.name = "DeltaProductPriceVolume" - self.description = "Current minus lagged product-price-volume signal." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - - def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: - close = df[self.price_col] - volume = df[self.volume_col] - - prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_median( - prior_volume, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ).replace(0, np.nan) - normalized_volume = volume / median_volume - - log_close = self._safe_log(close) - price_change = self._diff_by_asset(log_close, 1) - prior_change = self._shift_by_asset(price_change, 1) - - median_change = self._rolling_median( - prior_change, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ) - q75 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.75, - ) - q25 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.25, - ) - iqr = (q75 - q25).replace(0, np.nan) - - normalized_change = (price_change - median_change) / iqr - return normalized_volume, normalized_change - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) - precursor = normalized_volume * normalized_change - ppv = self._rolling_mean(precursor, window=self.hist_length) - return ppv - self._shift_by_asset(ppv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_sum_price_volume.py b/src/factorlab/factors/volume/delta_sum_price_volume.py deleted file mode 100644 index 3822e57..0000000 --- a/src/factorlab/factors/volume/delta_sum_price_volume.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaSumPriceVolume(VolumeFactor): - def __init__( - self, - hist_length: int = 40, - delta_dist: int = 35, - norm_lookback: int = 250, - norm_min_periods: int = 50, - **kwargs, - ): - super().__init__(**kwargs) - self.hist_length = hist_length - self.delta_dist = delta_dist - self.norm_lookback = norm_lookback - self.norm_min_periods = norm_min_periods - self.name = "DeltaSumPriceVolume" - self.description = "Current minus lagged sum-price-volume signal." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.delta_dist}" - - def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: - close = df[self.price_col] - volume = df[self.volume_col] - - prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_median( - prior_volume, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ).replace(0, np.nan) - normalized_volume = volume / median_volume - - log_close = self._safe_log(close) - price_change = self._diff_by_asset(log_close, 1) - prior_change = self._shift_by_asset(price_change, 1) - - median_change = self._rolling_median( - prior_change, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ) - q75 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.75, - ) - q25 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.25, - ) - iqr = (q75 - q25).replace(0, np.nan) - - normalized_change = (price_change - median_change) / iqr - return normalized_volume, normalized_change - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) - precursor = normalized_volume + normalized_change.abs() - precursor = precursor.where(normalized_change >= 0, -precursor) - spv = self._rolling_mean(precursor, window=self.hist_length) - return spv - self._shift_by_asset(spv, self.delta_dist) diff --git a/src/factorlab/factors/volume/delta_volume_momentum.py b/src/factorlab/factors/volume/delta_volume_momentum.py deleted file mode 100644 index 3a5e71a..0000000 --- a/src/factorlab/factors/volume/delta_volume_momentum.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DeltaVolumeMomentum(VolumeFactor): - def __init__(self, hist_length: int = 20, multiplier: int = 4, delta_len: int = 100, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.multiplier = multiplier - self.delta_len = delta_len - self.name = "DeltaVolumeMomentum" - self.description = "Current minus lagged volume momentum." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.multiplier}_{self.delta_len}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - volume = df[self.volume_col] - short_ma = self._rolling_mean(volume, window=self.hist_length) - long_ma = self._rolling_mean(volume, window=self.hist_length * self.multiplier) - vmom = self._safe_log(short_ma / long_ma.replace(0, np.nan)) - return vmom - self._shift_by_asset(vmom, self.delta_len) diff --git a/src/factorlab/factors/volume/diff_price_volume_fit.py b/src/factorlab/factors/volume/diff_price_volume_fit.py deleted file mode 100644 index e68fc7f..0000000 --- a/src/factorlab/factors/volume/diff_price_volume_fit.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DiffPriceVolumeFit(VolumeFactor): - def __init__(self, short_dist: int = 20, long_dist: int = 100, **kwargs): - super().__init__(**kwargs) - self.short_dist = short_dist - self.long_dist = long_dist - self.name = "DiffPriceVolumeFit" - self.description = "Short minus long price-volume fit slope." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.short_dist}_{self.long_dist}" - - def _pv_fit(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - x = self._safe_log(volume) - y = self._safe_log(close) - - mean_x = self._rolling_mean(x, window=hist_length) - mean_y = self._rolling_mean(y, window=hist_length) - mean_xy = self._rolling_mean(x * y, window=hist_length) - mean_x2 = self._rolling_mean(x * x, window=hist_length) - - cov_xy = mean_xy - (mean_x * mean_y) - var_x = mean_x2 - (mean_x * mean_x) - return cov_xy / var_x.replace(0, np.nan) - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - short = self._pv_fit(df, self.short_dist) - long = self._pv_fit(df, self.long_dist) - return short - long diff --git a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py deleted file mode 100644 index 3ce49d5..0000000 --- a/src/factorlab/factors/volume/diff_volume_weighted_ma_over_ma.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class DiffVolumeWeightedMAOverMA(VolumeFactor): - def __init__(self, short_dist: int = 20, long_dist: int = 100, **kwargs): - super().__init__(**kwargs) - self.short_dist = short_dist - self.long_dist = long_dist - self.name = "DiffVolumeWeightedMAOverMA" - self.description = "Short minus long VWMA-over-MA signal." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.short_dist}_{self.long_dist}" - - def _vwma_over_ma(self, df: pd.DataFrame, hist_length: int) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - pv = close * volume - vwma = self._rolling_stat(pv, window=hist_length, stat="sum") / self._rolling_stat( - volume, window=hist_length, stat="sum" - ).replace(0, np.nan) - ma = self._rolling_mean(close, window=hist_length) - - return self._safe_log(vwma / ma.replace(0, np.nan)) - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - short = self._vwma_over_ma(df, self.short_dist) - long = self._vwma_over_ma(df, self.long_dist) - return short - long diff --git a/src/factorlab/factors/volume/negative_volume_indicator.py b/src/factorlab/factors/volume/negative_volume_indicator.py deleted file mode 100644 index 5e4a1db..0000000 --- a/src/factorlab/factors/volume/negative_volume_indicator.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class NegativeVolumeIndicator(VolumeFactor): - def __init__(self, hist_length: int = 40, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.name = "NegativeVolumeIndicator" - self.description = "Normalized average return on falling-volume bars." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - rel_change = self._pct_change_by_asset(close, periods=1) - prev_volume = self._shift_by_asset(volume, 1) - filtered = rel_change.where(volume < prev_volume, 0.0) - - avg_change = self._rolling_mean(filtered, window=self.hist_length) - norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_std( - rel_change, - window=norm_window, - min_periods=self.hist_length, - ).replace(0, np.nan) - - return avg_change / std_change diff --git a/src/factorlab/factors/volume/on_balance_volume.py b/src/factorlab/factors/volume/on_balance_volume.py deleted file mode 100644 index fad29da..0000000 --- a/src/factorlab/factors/volume/on_balance_volume.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class OnBalanceVolume(VolumeFactor): - def __init__(self, hist_length: int = 50, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.name = "OnBalanceVolume" - self.description = "Signed-volume over total-volume ratio." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - close_diff = self._diff_by_asset(close, 1) - signed_volume = volume * np.sign(close_diff) - - signed_sum = self._rolling_stat(signed_volume, window=self.hist_length, stat="sum") - total_sum = self._rolling_stat(volume, window=self.hist_length, stat="sum") - - return signed_sum / total_sum.replace(0, np.nan) diff --git a/src/factorlab/factors/volume/positive_volume_indicator.py b/src/factorlab/factors/volume/positive_volume_indicator.py deleted file mode 100644 index 4db2308..0000000 --- a/src/factorlab/factors/volume/positive_volume_indicator.py +++ /dev/null @@ -1,35 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class PositiveVolumeIndicator(VolumeFactor): - def __init__(self, hist_length: int = 40, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.name = "PositiveVolumeIndicator" - self.description = "Normalized average return on rising-volume bars." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - rel_change = self._pct_change_by_asset(close, periods=1) - prev_volume = self._shift_by_asset(volume, 1) - filtered = rel_change.where(volume > prev_volume, 0.0) - - avg_change = self._rolling_mean(filtered, window=self.hist_length) - norm_window = max(2 * self.hist_length, 250) - std_change = self._rolling_std( - rel_change, - window=norm_window, - min_periods=self.hist_length, - ).replace(0, np.nan) - - return avg_change / std_change diff --git a/src/factorlab/factors/volume/price_volume_fit.py b/src/factorlab/factors/volume/price_volume_fit.py deleted file mode 100644 index 39d7885..0000000 --- a/src/factorlab/factors/volume/price_volume_fit.py +++ /dev/null @@ -1,33 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class PriceVolumeFit(VolumeFactor): - def __init__(self, hist_length: int = 50, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.name = "PriceVolumeFit" - self.description = "Rolling slope for log(price) on log(volume)." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - x = self._safe_log(volume) - y = self._safe_log(close) - - mean_x = self._rolling_mean(x, window=self.hist_length) - mean_y = self._rolling_mean(y, window=self.hist_length) - mean_xy = self._rolling_mean(x * y, window=self.hist_length) - mean_x2 = self._rolling_mean(x * x, window=self.hist_length) - - cov_xy = mean_xy - (mean_x * mean_y) - var_x = mean_x2 - (mean_x * mean_x) - return cov_xy / var_x.replace(0, np.nan) diff --git a/src/factorlab/factors/volume/product_price_volume.py b/src/factorlab/factors/volume/product_price_volume.py deleted file mode 100644 index 962d4c8..0000000 --- a/src/factorlab/factors/volume/product_price_volume.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class ProductPriceVolume(VolumeFactor): - def __init__( - self, - hist_length: int = 25, - norm_lookback: int = 250, - norm_min_periods: int = 50, - **kwargs, - ): - super().__init__(**kwargs) - self.hist_length = hist_length - self.norm_lookback = norm_lookback - self.norm_min_periods = norm_min_periods - self.name = "ProductPriceVolume" - self.description = "Smoothed product of normalized price and volume shocks." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: - close = df[self.price_col] - volume = df[self.volume_col] - - prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_median( - prior_volume, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ).replace(0, np.nan) - normalized_volume = volume / median_volume - - log_close = self._safe_log(close) - price_change = self._diff_by_asset(log_close, 1) - prior_change = self._shift_by_asset(price_change, 1) - - median_change = self._rolling_median( - prior_change, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ) - q75 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.75, - ) - q25 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.25, - ) - iqr = (q75 - q25).replace(0, np.nan) - - normalized_change = (price_change - median_change) / iqr - return normalized_volume, normalized_change - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) - precursor = normalized_volume * normalized_change - return self._rolling_mean(precursor, window=self.hist_length) diff --git a/src/factorlab/factors/volume/sum_price_volume.py b/src/factorlab/factors/volume/sum_price_volume.py deleted file mode 100644 index d8d60e2..0000000 --- a/src/factorlab/factors/volume/sum_price_volume.py +++ /dev/null @@ -1,71 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class SumPriceVolume(VolumeFactor): - def __init__( - self, - hist_length: int = 25, - norm_lookback: int = 250, - norm_min_periods: int = 50, - **kwargs, - ): - super().__init__(**kwargs) - self.hist_length = hist_length - self.norm_lookback = norm_lookback - self.norm_min_periods = norm_min_periods - self.name = "SumPriceVolume" - self.description = "Smoothed signed sum of normalized price/volume shocks." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _normalized_volume_and_price_change(self, df: pd.DataFrame) -> tuple[pd.Series, pd.Series]: - close = df[self.price_col] - volume = df[self.volume_col] - - prior_volume = self._shift_by_asset(volume, 1) - median_volume = self._rolling_median( - prior_volume, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ).replace(0, np.nan) - normalized_volume = volume / median_volume - - log_close = self._safe_log(close) - price_change = self._diff_by_asset(log_close, 1) - prior_change = self._shift_by_asset(price_change, 1) - - median_change = self._rolling_median( - prior_change, - window=self.norm_lookback, - min_periods=self.norm_min_periods, - ) - q75 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.75, - ) - q25 = self._rolling_stat( - prior_change, - window=self.norm_lookback, - stat="quantile", - min_periods=self.norm_min_periods, - q=0.25, - ) - iqr = (q75 - q25).replace(0, np.nan) - - normalized_change = (price_change - median_change) / iqr - return normalized_volume, normalized_change - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - normalized_volume, normalized_change = self._normalized_volume_and_price_change(df) - precursor = normalized_volume + normalized_change.abs() - precursor = precursor.where(normalized_change >= 0, -precursor) - return self._rolling_mean(precursor, window=self.hist_length) diff --git a/src/factorlab/factors/volume/volume.py b/src/factorlab/factors/volume/volume.py deleted file mode 100644 index f3b3381..0000000 --- a/src/factorlab/factors/volume/volume.py +++ /dev/null @@ -1,132 +0,0 @@ -import pandas as pd -from typing import ClassVar, Dict, Optional, Type, Union - -from factorlab.core.base_transform import BaseTransform -from factorlab.factors.base import Factor -from factorlab.factors.volume.volume_momentum import VolumeMomentum -from factorlab.factors.volume.delta_volume_momentum import DeltaVolumeMomentum -from factorlab.factors.volume.volume_weighted_ma_over_ma import VolumeWeightedMAOverMA -from factorlab.factors.volume.diff_volume_weighted_ma_over_ma import DiffVolumeWeightedMAOverMA -from factorlab.factors.volume.price_volume_fit import PriceVolumeFit -from factorlab.factors.volume.diff_price_volume_fit import DiffPriceVolumeFit -from factorlab.factors.volume.delta_price_volume_fit import DeltaPriceVolumeFit -from factorlab.factors.volume.on_balance_volume import OnBalanceVolume -from factorlab.factors.volume.delta_on_balance_volume import DeltaOnBalanceVolume -from factorlab.factors.volume.positive_volume_indicator import PositiveVolumeIndicator -from factorlab.factors.volume.delta_positive_volume_indicator import DeltaPositiveVolumeIndicator -from factorlab.factors.volume.negative_volume_indicator import NegativeVolumeIndicator -from factorlab.factors.volume.delta_negative_volume_indicator import DeltaNegativeVolumeIndicator -from factorlab.factors.volume.product_price_volume import ProductPriceVolume -from factorlab.factors.volume.sum_price_volume import SumPriceVolume -from factorlab.factors.volume.delta_product_price_volume import DeltaProductPriceVolume -from factorlab.factors.volume.delta_sum_price_volume import DeltaSumPriceVolume -from factorlab.utils import to_dataframe - - -class Volume(Factor): - """Factory class for volume factors.""" - - _METHOD_MAP: ClassVar[Dict[str, Type[BaseTransform]]] = { - "volume_momentum": VolumeMomentum, - "delta_volume_momentum": DeltaVolumeMomentum, - "volume_weighted_ma_over_ma": VolumeWeightedMAOverMA, - "diff_volume_weighted_ma_over_ma": DiffVolumeWeightedMAOverMA, - "price_volume_fit": PriceVolumeFit, - "diff_price_volume_fit": DiffPriceVolumeFit, - "delta_price_volume_fit": DeltaPriceVolumeFit, - "on_balance_volume": OnBalanceVolume, - "delta_on_balance_volume": DeltaOnBalanceVolume, - "positive_volume_indicator": PositiveVolumeIndicator, - "delta_positive_volume_indicator": DeltaPositiveVolumeIndicator, - "negative_volume_indicator": NegativeVolumeIndicator, - "delta_negative_volume_indicator": DeltaNegativeVolumeIndicator, - "product_price_volume": ProductPriceVolume, - "sum_price_volume": SumPriceVolume, - "delta_product_price_volume": DeltaProductPriceVolume, - "delta_sum_price_volume": DeltaSumPriceVolume, - } - - _ALIASES: ClassVar[Dict[str, str]] = { - "vmom": "volume_momentum", - "dvmom": "delta_volume_momentum", - "vwmama": "volume_weighted_ma_over_ma", - "dvwmama": "diff_volume_weighted_ma_over_ma", - "pvf": "price_volume_fit", - "difpvf": "diff_price_volume_fit", - "dpvf": "delta_price_volume_fit", - "obv": "on_balance_volume", - "dobv": "delta_on_balance_volume", - "pvi": "positive_volume_indicator", - "dpvi": "delta_positive_volume_indicator", - "nvi": "negative_volume_indicator", - "dnvi": "delta_negative_volume_indicator", - "ppv": "product_price_volume", - "spv": "sum_price_volume", - "dppv": "delta_product_price_volume", - "dspv": "delta_sum_price_volume", - } - - @classmethod - def get_factor_metadata(cls) -> pd.DataFrame: - data = [] - for alias, factor_class in cls._METHOD_MAP.items(): - try: - factor_instance = factor_class() - data.append( - { - "Alias": alias, - "Class": factor_class.__name__, - "Description": factor_instance.description, - } - ) - except Exception as exc: - data.append( - { - "Alias": alias, - "Class": factor_class.__name__, - "Description": f"Instantiation Failed: {exc}", - } - ) - - return pd.DataFrame(data).set_index("Alias") - - def __init__(self, method: str = "volume_momentum", **kwargs): - super().__init__( - name="Volume", - description="A factory for volume-based factors.", - category="Volume", - ) - - method = method.lower().strip() - self.method = self._ALIASES.get(method, method) - self.kwargs = kwargs - - if self.method not in self._METHOD_MAP: - raise ValueError( - f"Invalid volume factor method '{self.method}'. " - f"Method must be one of: {list(self._METHOD_MAP.keys())}" - ) - - factor_class = self._METHOD_MAP[self.method] - self._factor: Factor = factor_class(**self.kwargs) - - @property - def inputs(self) -> list[str]: - return self._factor.inputs - - def fit( - self, - X: Union[pd.Series, pd.DataFrame], - y: Optional[Union[pd.Series, pd.DataFrame]] = None, - ) -> "Volume": - df_input = to_dataframe(X) - self.validate_inputs(df_input) - self._factor.fit(df_input) - self._is_fitted = True - return self - - def transform(self, data: Union[pd.Series, pd.DataFrame]) -> pd.DataFrame: - if not self._is_fitted: - raise RuntimeError("Volume transform must be fitted before calling transform().") - - return self._factor.transform(data) diff --git a/src/factorlab/factors/volume/volume_momentum.py b/src/factorlab/factors/volume/volume_momentum.py deleted file mode 100644 index b0a362e..0000000 --- a/src/factorlab/factors/volume/volume_momentum.py +++ /dev/null @@ -1,25 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class VolumeMomentum(VolumeFactor): - def __init__(self, hist_length: int = 20, multiplier: int = 4, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.multiplier = multiplier - self.name = "VolumeMomentum" - self.description = "Short-vs-long volume momentum ratio." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}_{self.multiplier}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - volume = df[self.volume_col] - short_ma = self._rolling_mean(volume, window=self.hist_length) - long_ma = self._rolling_mean(volume, window=self.hist_length * self.multiplier) - ratio = short_ma / long_ma.replace(0, np.nan) - return self._safe_log(ratio) diff --git a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py b/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py deleted file mode 100644 index ee080a8..0000000 --- a/src/factorlab/factors/volume/volume_weighted_ma_over_ma.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import annotations - -import numpy as np -import pandas as pd - -from factorlab.factors.volume.base import VolumeFactor - - -class VolumeWeightedMAOverMA(VolumeFactor): - def __init__(self, hist_length: int = 50, **kwargs): - super().__init__(**kwargs) - self.hist_length = hist_length - self.name = "VolumeWeightedMAOverMA" - self.description = "Log ratio of VWMA over MA." - - def _generate_name(self) -> str: - return self.output_col or f"{self.name}_{self.hist_length}" - - def _compute_volume(self, df: pd.DataFrame) -> pd.Series: - close = df[self.price_col] - volume = df[self.volume_col] - - pv = close * volume - vwma = self._rolling_stat(pv, window=self.hist_length, stat="sum") / self._rolling_stat( - volume, window=self.hist_length, stat="sum" - ).replace(0, np.nan) - ma = self._rolling_mean(close, window=self.hist_length) - - return self._safe_log(vwma / ma.replace(0, np.nan)) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 2bbf859..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys -from pathlib import Path - - -ROOT = Path(__file__).resolve().parents[1] -SRC = ROOT / "src" - -if str(SRC) not in sys.path: - sys.path.insert(0, str(SRC)) - diff --git a/tests/features/test_volume_factors.py b/tests/features/test_volume_factors.py deleted file mode 100644 index 3817558..0000000 --- a/tests/features/test_volume_factors.py +++ /dev/null @@ -1,127 +0,0 @@ -import numpy as np -import pandas as pd -import pytest -from pathlib import Path - -from factorlab.factors.volume import Volume - - -FACTOR_SPECS = [ - ("volume_momentum", {"hist_length": 20, "multiplier": 4}), - ("delta_volume_momentum", {"hist_length": 20, "multiplier": 4, "delta_len": 100}), - ("volume_weighted_ma_over_ma", {"hist_length": 50}), - ("diff_volume_weighted_ma_over_ma", {"short_dist": 20, "long_dist": 100}), - ("price_volume_fit", {"hist_length": 50}), - ("diff_price_volume_fit", {"short_dist": 20, "long_dist": 100}), - ("delta_price_volume_fit", {"hist_length": 20, "delta_dist": 30}), - ("on_balance_volume", {"hist_length": 50}), - ("delta_on_balance_volume", {"hist_length": 50, "delta_dist": 45}), - ("positive_volume_indicator", {"hist_length": 40}), - ("delta_positive_volume_indicator", {"hist_length": 40, "delta_dist": 35}), - ("negative_volume_indicator", {"hist_length": 40}), - ("delta_negative_volume_indicator", {"hist_length": 40, "delta_dist": 35}), - ("product_price_volume", {"hist_length": 25}), - ("sum_price_volume", {"hist_length": 25}), - ("delta_product_price_volume", {"hist_length": 40, "delta_dist": 35}), - ("delta_sum_price_volume", {"hist_length": 40, "delta_dist": 35}), -] - - -@pytest.fixture(scope="module") -def crypto_universe() -> pd.DataFrame: - data_path = Path(__file__).resolve().parents[1] / "datasets" / "data" / "binance_spot_prices.csv" - df = pd.read_csv( - data_path, - index_col=["date", "ticker"], - parse_dates=["date"], - ) - df = df.sort_index() - - # keep symbols with at least 300 daily bars - counts = df.groupby(level=1).size() - keep = counts[counts >= 300].index - df = df[df.index.get_level_values(1).isin(keep)] - - # keep a liquid subset to keep tests fast and stable - avg_notional = (df["close"] * df["volume"]).groupby(level=1).mean() - top_symbols = avg_notional.nlargest(60).index - df = df[df.index.get_level_values(1).isin(top_symbols)] - - return df[["open", "high", "low", "close", "volume"]] - - -@pytest.mark.parametrize("method,kwargs", FACTOR_SPECS) -def test_volume_factor_methods_smoke(crypto_universe: pd.DataFrame, method: str, kwargs: dict) -> None: - factor = Volume(method=method, **kwargs) - out = factor.compute(crypto_universe) - - created_cols = [col for col in out.columns if col not in crypto_universe.columns] - assert len(created_cols) == 1 - - factor_col = created_cols[0] - values = out[factor_col].dropna() - - assert len(values) > 0 - assert (values <= 50).all() - assert (values >= -50).all() - - pd.testing.assert_frame_equal(out[crypto_universe.columns], crypto_universe) - assert out.index.equals(crypto_universe.index) - - -def test_volume_factor_crypto_rank_ic_smoke(crypto_universe: pd.DataFrame) -> None: - close = crypto_universe["close"] - volume = crypto_universe["volume"] - - fwd_ret = close.groupby(level=1).shift(-1).div(close) - 1.0 - - # daily tradable universe proxy: top 40 by 20-day average notional - notional = close * volume - liquidity = ( - notional.groupby(level=1) - .rolling(window=20, min_periods=20) - .mean() - .droplevel(0) - .sort_index() - ) - eligible = liquidity.groupby(level=0).rank(ascending=False, method="first") <= 40 - - rows = [] - for method, kwargs in FACTOR_SPECS: - factor = Volume(method=method, **kwargs) - out = factor.compute(crypto_universe) - factor_col = [col for col in out.columns if col not in crypto_universe.columns][0] - - panel = pd.concat( - [ - out[factor_col].rename("factor"), - fwd_ret.rename("fwd_ret"), - eligible.rename("eligible"), - ], - axis=1, - ) - panel = panel[panel["eligible"]].dropna() - - daily_ic = panel.groupby(level=0).apply( - lambda g: g["factor"].corr(g["fwd_ret"], method="spearman") if g.shape[0] >= 12 else np.nan - ) - - n_obs = int(daily_ic.notna().sum()) - mean_ic = float(daily_ic.mean()) if n_obs > 0 else np.nan - std_ic = float(daily_ic.std()) if n_obs > 1 else np.nan - - rows.append( - { - "method": method, - "n_obs": n_obs, - "mean_ic": mean_ic, - "std_ic": std_ic, - } - ) - - summary = pd.DataFrame(rows).set_index("method") - - assert summary.shape[0] == len(FACTOR_SPECS) - assert (summary["n_obs"] >= 30).sum() >= 12 - assert np.isfinite(summary["mean_ic"].dropna()).all() - assert (summary["mean_ic"].dropna().abs() <= 1).all()