diff --git a/.gitignore b/.gitignore index 8475ebb..1e99b5a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,10 @@ runs catboost_info *.pyc +__pycache__/ *.log *.whl *.gz -*.__pycache__ build/ dist/ *egg-info*/ diff --git a/alphapy/alphapy_main.py b/alphapy/alphapy_main.py index 22f342a..9053a5b 100644 --- a/alphapy/alphapy_main.py +++ b/alphapy/alphapy_main.py @@ -257,13 +257,13 @@ def training_pipeline(alphapy_specs, model): logger.info("Original Feature Statistics") logger.info("Number of Training Rows : %d", X_train.shape[0]) logger.info("Number of Training Columns : %d", X_train.shape[1]) - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: uv, uc = np.unique(y_train, return_counts=True) logger.info("Unique Training Values for %s : %s", target, uv) logger.info("Unique Training Counts for %s : %s", target, uc) logger.info("Number of Testing Rows : %d", X_test.shape[0]) logger.info("Number of Testing Columns : %d", X_test.shape[1]) - if (model_type == ModelType.classification or model_type == ModelType.system) and model.test_labels: + if (model_type == ModelType.classification) and model.test_labels: uv, uc = np.unique(y_test, return_counts=True) logger.info("Unique Testing Values for %s : %s", target, uv) logger.info("Unique Testing Counts for %s : %s", target, uc) @@ -302,7 +302,7 @@ def training_pipeline(alphapy_specs, model): # Create crosstabs for any categorical features - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: create_crosstabs(model, target) # Create initial features @@ -524,7 +524,7 @@ def prediction_pipeline(alphapy_specs, model): logger.info("Making Predictions") tag = 'BEST' model.preds[(tag, partition)] = predictor.predict(X_all) - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: model.probas[(tag, partition)] = predictor.predict_proba(X_all)[:, 1] # Save predictions diff --git a/alphapy/calendrical.py b/alphapy/calendrical.py index b6b2141..01e8149 100644 --- a/alphapy/calendrical.py +++ b/alphapy/calendrical.py @@ -1271,3 +1271,106 @@ def set_holidays(gyear, observe): else: holidays[h] = hfunc(gyear) return holidays + + +# +# Function dateparts +# + +def dateparts(df, c): + r"""Extract date into its components: year, month, day, dayofweek. + + Parameters + ---------- + df : pandas.DataFrame + Dataframe containing the column ``c``. + c : str + Name of the column in the dataframe ``df``. + + Returns + ------- + date_features : pandas.DataFrame + The dataframe containing the date features. + """ + + col = df[c] + if hasattr(col, "to_pandas"): + col = col.to_pandas() + ds_dt = pd.to_datetime(col) + date_features = pd.DataFrame() + try: + fyear = pd.Series(ds_dt.dt.year, name='year').astype(int) + fmonth = pd.Series(ds_dt.dt.month, name='month').astype(int) + fday = pd.Series(ds_dt.dt.day, name='day').astype(int) + fdow = pd.Series(ds_dt.dt.dayofweek, name='dayofweek').astype(int) + frames = [fyear, fmonth, fday, fdow] + date_features = pd.concat(frames, axis=1) + except Exception: + logger.info("Could not extract date information from the series") + return date_features + + +# +# Function timeparts +# + +def timeparts(df, c): + r"""Extract time into its components: hour, minute, second. + + Parameters + ---------- + df : pandas.DataFrame + Dataframe containing the column ``c``. + c : str + Name of the column in the dataframe ``df``. + + Returns + ------- + time_features : pandas.DataFrame + The dataframe containing the time features. + """ + + ds_dt = pd.to_datetime(df[c].astype(str), format='%H:%M:%S') + time_features = pd.DataFrame() + try: + fhour = pd.Series(ds_dt.dt.hour, name='hour').astype(int) + fminute = pd.Series(ds_dt.dt.minute, name='minute').astype(int) + fsecond = pd.Series(ds_dt.dt.second, name='second').astype(int) + frames = [fhour, fminute, fsecond] + time_features = pd.concat(frames, axis=1) + except Exception: + logger.info("Could not extract time information from the series") + return time_features + + +# +# Function bizday +# + +def bizday(df, c): + r"""Extract business day of month and week. + + Parameters + ---------- + df : pandas.DataFrame + Dataframe containing the column ``c``. + c : str + Name of the column in the dataframe ``df``. + + Returns + ------- + date_features : pandas.DataFrame + The dataframe containing the date features. + """ + + date_features = pd.DataFrame() + try: + date_features = dateparts(df, c) + rdate = date_features.apply(get_rdate, axis=1) + bdm = pd.Series(rdate.apply(biz_day_month), name='bizdaymonth') + bdw = pd.Series(rdate.apply(biz_day_week), name='bizdayweek') + frames = [date_features, bdm, bdw] + date_features = pd.concat(frames, axis=1) + except Exception: + logger.info("Could not extract business date information from the series") + return date_features diff --git a/alphapy/data.py b/alphapy/data.py index 372296c..7458c36 100644 --- a/alphapy/data.py +++ b/alphapy/data.py @@ -32,18 +32,14 @@ from alphapy.globals import datasets from alphapy.globals import ModelType from alphapy.globals import Partition -from alphapy.globals import PD_INTRADAY_OFFSETS from alphapy.globals import SSEP from alphapy.globals import WILDCARD from alphapy.space import Space -from datetime import datetime -from io import BytesIO import logging import numpy as np import polars as pl import re -import requests from sklearn.preprocessing import LabelEncoder import sys @@ -121,7 +117,7 @@ def get_data(model, partition): # assign the target column to y df_y = df.select(target) # encode label only for classification or system - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: y = LabelEncoder().fit_transform(df_y[target].to_numpy()) df_y = pl.DataFrame({target: y}) # drop the target from the original frame @@ -178,4 +174,3 @@ def shuffle_data(model): return model - diff --git a/alphapy/features.py b/alphapy/features.py index 827ace0..f1af6ef 100644 --- a/alphapy/features.py +++ b/alphapy/features.py @@ -1543,7 +1543,7 @@ def create_interactions(model, X): logger.info("Polynomial Degree : %d", poly_degree) if model_type == ModelType.regression: selector = SelectPercentile(f_regression, percentile=isample_pct) - elif model_type == ModelType.classification or model_type == ModelType.system: + elif model_type == ModelType.classification: selector = SelectPercentile(f_classif, percentile=isample_pct) else: raise TypeError("Unknown model type when creating interactions") diff --git a/alphapy/globals.py b/alphapy/globals.py index 7c48888..c370153 100644 --- a/alphapy/globals.py +++ b/alphapy/globals.py @@ -130,7 +130,6 @@ class ModelType(Enum): ranking = 2 multiclass = 3 regression = 4 - system = 5 # @@ -151,54 +150,6 @@ class Objective(Enum): minimize = 2 -# -# Bar Types -# - -@unique -class BarType(Enum): - """Bar Types. - - Bar Types for running models, usually translated from a normal OHLC bar to a - weighted bar based on volume, dollar amount, etc. - - """ - time = 1 - dollar = 2 - heikinashi = 3 - - -# -# Class Orders -# - -class Orders: - """System Order Types. - - Attributes - ---------- - le : str - long entry - se : str - short entry - lx : str - long exit - sx : str - short exit - lh : str - long exit at the end of the holding period - sh : str - short exit at the end of the holding period - - """ - le = 'le' - se = 'se' - lx = 'lx' - sx = 'sx' - lh = 'lh' - sh = 'sh' - - # # Partition Types # diff --git a/alphapy/model.py b/alphapy/model.py index 3df98f6..341883f 100644 --- a/alphapy/model.py +++ b/alphapy/model.py @@ -886,7 +886,7 @@ def make_predictions(model, algo): model.preds[(algo, Partition.train)] = train_preds model.preds[(algo, Partition.test)] = test_preds - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: # Get probabilities for the positive class. train_probas = est.predict_proba(X_train) test_probas = est.predict_proba(X_test) @@ -945,7 +945,7 @@ def predict_blend(model): for partition in datasets.keys(): pred_set = [model.preds[key] for key, _ in model.preds.items() if partition in key] model.preds[(blend_tag, partition)] = np.round(np.mean(pred_set, axis=0), 0).astype(int) - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: proba_set = [model.probas[key] for key, _ in model.probas.items() if partition in key] model.probas[(blend_tag, partition)] = np.mean(proba_set, axis=0) model.preds[(blend_tag, partition)] = np.round(model.probas[(blend_tag, partition)], 0).astype(int) @@ -1048,7 +1048,7 @@ def select_best_model(model, partition): model.best_algo = best_algo model.estimators[best_tag] = model.estimators[best_algo] model.preds[(best_tag, partition)] = model.preds[(best_algo, partition)] - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: model.probas[(best_tag, partition)] = model.probas[(best_algo, partition)] # Record support vector for any recursive feature elimination @@ -1173,7 +1173,7 @@ def kendall_tau(expected, predicted): # get predictions for the given algorithm predicted = model.preds[(algo, partition)] # Classification Metrics - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: probas = model.probas[(algo, partition)] try: model.metrics[(algo, partition, 'accuracy')] = accuracy_score(expected, predicted) @@ -1369,14 +1369,14 @@ def save_predictions(model, partition): for tag_id in tag_list: pred_name = USEP.join(['pred', datasets[partition], tag_id.lower()]) df_master = df_master.with_columns(pl.Series(pred_name, model.preds[(tag_id, partition)])) - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: prob_name = USEP.join(['prob', datasets[partition], tag_id.lower()]) df_master = df_master.with_columns(pl.Series(prob_name, model.probas[(tag_id, partition)])) # Save ranked predictions logger.info("Saving Ranked Predictions") - if model_type == ModelType.classification or model_type == ModelType.system: + if model_type == ModelType.classification: prob_name = USEP.join(['prob', datasets[partition], sort_tag.lower()]) df_master = df_master.sort(prob_name, descending=True) else: @@ -1391,7 +1391,7 @@ def save_predictions(model, partition): sample_spec = PSEP.join([submission_file, extension]) sample_input = SSEP.join([data_dir, sample_spec]) df_sub = pd.read_csv(sample_input) - if submit_probas and (model_type == ModelType.classification or model_type == ModelType.system): + if submit_probas and (model_type == ModelType.classification): df_sub[df_sub.columns[1]] = model.probas[(model.best_algo, Partition.test)] else: df_sub[df_sub.columns[1]] = model.preds[(model.best_algo, Partition.test)] diff --git a/alphapy/plots.py b/alphapy/plots.py index ceb32d9..e42debf 100644 --- a/alphapy/plots.py +++ b/alphapy/plots.py @@ -316,7 +316,7 @@ def plot_calibration(model, partition): # For classification only model_type = model.specs['model_type'] - if model_type != ModelType.classification and model_type != ModelType.system: + if model_type != ModelType.classification: logger.info('Calibration plot is for classification only') return None @@ -571,7 +571,7 @@ def plot_roc_curve(model, partition): # For classification only model_type = model.specs['model_type'] - if model_type != ModelType.classification and model_type != ModelType.system: + if model_type != ModelType.classification: logger.info('ROC Curves are for classification only') return None @@ -648,7 +648,7 @@ def plot_confusion_matrix(model, partition): # For classification only model_type = model.specs['model_type'] - if model_type != ModelType.classification and model_type != ModelType.system: + if model_type != ModelType.classification: logger.info('Confusion Matrix is for classification only') return None @@ -850,7 +850,7 @@ def plot_boundary(model, partition, f1=0, f2=1): # For classification only model_type = model.specs['model_type'] - if model_type != ModelType.classification and model_type != ModelType.system: + if model_type != ModelType.classification: logger.info('Boundary Plots are for classification only') return None diff --git a/alphapy/transforms.py b/alphapy/transforms.py deleted file mode 100644 index 8faf2a3..0000000 --- a/alphapy/transforms.py +++ /dev/null @@ -1,2485 +0,0 @@ -""" -Package : AlphaPy -Module : transforms -Created : March 14, 2020 - -Copyright 2024 ScottFree Analytics LLC -Mark Conway & Robert D. Scott II - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -# -# Imports -# - -import itertools -import logging -import numpy as np -import pandas as pd - -from alphapy.calendrical import biz_day_month -from alphapy.calendrical import biz_day_week -from alphapy.calendrical import get_rdate -from alphapy.globals import NULLTEXT -from alphapy.globals import BSEP, USEP -from alphapy.variables import vexec - - -# -# Initialize logger -# - -logger = logging.getLogger(__name__) - - -# -# Function adx -# - -def adx(df, p=14): - r"""Calculate the Average Directional Index (ADX). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with all columns required for calculation. If you - are applying ADX through ``vapply``, then these columns are - calculated automatically. - p : int - The period over which to calculate the ADX. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - The Average Directional Movement Index (ADX) was invented by J. Welles - Wilder in 1978 [WIKI_ADX]_. Its value reflects the strength of trend in any - given instrument. - - .. [WIKI_ADX] https://en.wikipedia.org/wiki/Average_directional_movement_index - - """ - c1 = 'diplus' - df = vexec(df, c1) - c2 = 'diminus' - df = vexec(df, c2) - # calculations - dip = df[c1] - dim = df[c2] - didiff = abs(dip - dim) - disum = dip + dim - new_column = 100 * didiff.ewm(span=p).mean() / disum - return new_column - - -# -# Function bbands -# - -def bbands(df, c='close', p=20, sd=2.0, low_band=True): - r"""Calculate the Bollinger Bands. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Simple Moving Average. - sd : float - The number of standard deviations. - low_band : bool, optional - If set to True, then calculate the lower band, else the upper band. - - Returns - ------- - bband : pandas.Series - The series for the selected Bollinger Band. - """ - - sma = ma(df, c, p) - if low_band: - bband = sma - sd * df[c].rolling(p).std(ddof=0) - else: - bband = sma + sd * df[c].rolling(p).std(ddof=0) - return bband - - -# -# Function bblower -# - -def bblower(df, c='close', p=20, sd=1.5): - r"""Calculate the lower Bollinger Band. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Simple Moving Average. - sd : float - The number of standard deviations. - - Returns - ------- - lower_band : pandas.Series - The series containing the lower Bollinger Band. - """ - - lower_band = bbands(df, c, p, sd) - return lower_band - - -# -# Function bbupper -# - -def bbupper(df, c='close', p=20, sd=1.5): - r"""Calculate the upper Bollinger Band. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Simple Moving Average. - sd : float - The number of standard deviations. - - Returns - ------- - upper_band : pandas.Series - The series containing the upper Bollinger Band. - """ - - upper_band = bbands(df, c, p, sd, low_band=False) - return upper_band - - -# -# Function bizday -# - -def bizday(df, c): - r"""Extract business day of month and week. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - date_features : pandas.DataFrame - The dataframe containing the date features. - """ - - date_features = pd.DataFrame() - try: - date_features = dateparts(df[c]) - rdate = date_features.apply(get_rdate, axis=1) - bdm = pd.Series(rdate.apply(biz_day_month), name='bizdaymonth') - bdw = pd.Series(rdate.apply(biz_day_week), name='bizdayweek') - frames = [date_features, bdm, bdw] - date_features = pd.concat(frames, axis=1) - except: - logger.info("Could not extract business date information from the series") - return date_features - - -# -# Function c2max -# - -def c2max(df, c1, c2): - r"""Take the maximum value between two columns in a dataframe. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the two columns ``c1`` and ``c2``. - c1 : str - Name of the first column in the dataframe ``df``. - c2 : str - Name of the second column in the dataframe ``df``. - - Returns - ------- - max_val : float - The maximum value of the two columns. - - """ - max_val = max(df[c1], df[c2]) - return max_val - - -# -# Function c2min -# - -def c2min(df, c1, c2): - r"""Take the minimum value between two columns in a dataframe. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the two columns ``c1`` and ``c2``. - c1 : str - Name of the first column in the dataframe ``df``. - c2 : str - Name of the second column in the dataframe ``df``. - - Returns - ------- - min_val : float - The minimum value of the two columns. - - """ - min_val = min(df[c1], df[c2]) - return min_val - - -# -# Function dateparts -# - -def dateparts(df, c): - r"""Extract date into its components: year, month, day, dayofweek. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - date_features : pandas.DataFrame - The dataframe containing the date features. - """ - - col = df[c] - if hasattr(col, "to_pandas"): - col = col.to_pandas() - ds_dt = pd.to_datetime(col) - date_features = pd.DataFrame() - try: - fyear = pd.Series(ds_dt.dt.year, name='year').astype(int) - fmonth = pd.Series(ds_dt.dt.month, name='month').astype(int) - fday = pd.Series(ds_dt.dt.day, name='day').astype(int) - fdow = pd.Series(ds_dt.dt.dayofweek, name='dayofweek').astype(int) - frames = [fyear, fmonth, fday, fdow] - date_features = pd.concat(frames, axis=1) - except: - logger.info("Could not extract date information from the series") - return date_features - - -# -# Function diff -# - -def diff(df, c, n=1): - r"""Calculate the n-th order difference for the given variable. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - n : int - The number of times that the values are differenced. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - """ - new_column = np.diff(df[c], n) - return new_column - - -# -# Function diminus -# - -def diminus(df, p=14): - r"""Calculate the Minus Directional Indicator (-DI). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with columns ``high`` and ``low``. - p : int - The period over which to calculate the -DI. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *A component of the average directional index (ADX) that is used to - measure the presence of a downtrend. When the -DI is sloping downward, - it is a signal that the downtrend is getting stronger* [IP_NDI]_. - - .. [IP_NDI] http://www.investopedia.com/terms/n/negativedirectionalindicator.asp - - """ - tr = 'truerange' - df = vexec(df, tr) - atr = USEP.join(['atr', str(p)]) - df = vexec(df, atr) - dmm = 'dmminus' - df[dmm] = dminus(df) - new_column = 100 * dminus(df).ewm(span=p).mean() / df[atr] - return new_column - - -# -# Function diplus -# - -def diplus(df, p=14): - r"""Calculate the Plus Directional Indicator (+DI). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with columns ``high`` and ``low``. - p : int - The period over which to calculate the +DI. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *A component of the average directional index (ADX) that is used to - measure the presence of an uptrend. When the +DI is sloping upward, - it is a signal that the uptrend is getting stronger* [IP_PDI]_. - - .. [IP_PDI] http://www.investopedia.com/terms/p/positivedirectionalindicator.asp - - """ - tr = 'truerange' - df = vexec(df, tr) - atr = USEP.join(['atr', str(p)]) - df = vexec(df, atr) - dmp = 'dmplus' - df = vexec(df, dmp) - new_column = 100 * df[dmp].ewm(span=p).mean() / df[atr] - return new_column - - -# -# Function dminus -# - -def dminus(df): - r"""Calculate the Minus Directional Movement (-DM). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with high and low columns. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *Directional movement is negative (minus) when the prior low minus - the current low is greater than the current high minus the prior high. - This so-called Minus Directional Movement (-DM) equals the prior low - minus the current low, provided it is positive. A negative value - would simply be entered as zero* [SC_ADX]_. - - """ - c1 = 'downmove' - df[c1] = -net(df, 'low') - c2 = 'upmove' - df[c2] = net(df, 'high') - new_column = df.apply(gtval0, axis=1, args=[c1, c2]) - return new_column - - -# -# Function dmplus -# - -def dmplus(df): - r"""Calculate the Plus Directional Movement (+DM). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with high and low columns. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *Directional movement is positive (plus) when the current high minus - the prior high is greater than the prior low minus the current low. - This so-called Plus Directional Movement (+DM) then equals the current - high minus the prior high, provided it is positive. A negative value - would simply be entered as zero* [SC_ADX]_. - - .. [SC_ADX] http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:average_directional_index_adx - - """ - c1 = 'upmove' - df[c1] = net(df, 'high') - c2 = 'downmove' - df[c2] = -net(df, 'low') - new_column = df.apply(gtval0, axis=1, args=[c1, c2]) - return new_column - - -# -# Function ema -# - -def ema(df, c, p=20): - r"""Calculate the Exponential Moving Average (EMA) on a rolling basis. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average (EMA). - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *An exponential moving average (EMA) is a type of moving average - that is similar to a simple moving average, except that more weight - is given to the latest data* [IP_EMA]_. - - .. [IP_EMA] http://www.investopedia.com/terms/e/ema.asp - - """ - new_column = df[c].ewm(span=p).mean() - return new_column - - -# -# Function gap -# - -def gap(df): - r"""Calculate the gap percentage between the current open and - the previous close. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with open and close columns. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *A gap is a break between prices on a chart that occurs when the - price of a stock makes a sharp move up or down with no trading - occurring in between* [IP_GAP]_. - - .. [IP_GAP] http://www.investopedia.com/terms/g/gap.asp - - """ - c1 = ''.join(['close', '[1]']) - df = vexec(df, c1) - new_column = 100 * pchange2(df, 'open', c1) - return new_column - - -# -# Function gapbadown -# - -def gapbadown(df): - r"""Determine whether or not there has been a breakaway gap down. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with open and low columns. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - References - ---------- - *A breakaway gap represents a gap in the movement of a stock price - supported by levels of high volume* [IP_BAGAP]_. - - .. [IP_BAGAP] http://www.investopedia.com/terms/b/breakawaygap.asp - - """ - new_column = df['open'] < df['low'].shift(1) - return new_column - - -# -# Function gapbaup -# - -def gapbaup(df): - r"""Determine whether or not there has been a breakaway gap up. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with open and high columns. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - References - ---------- - *A breakaway gap represents a gap in the movement of a stock price - supported by levels of high volume* [IP_BAGAP]_. - - """ - new_column = df['open'] > df['high'].shift(1) - return new_column - - -# -# Function gapdown -# - -def gapdown(df): - r"""Determine whether or not there has been a gap down. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with open and close columns. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - References - ---------- - *A gap is a break between prices on a chart that occurs when the - price of a stock makes a sharp move up or down with no trading - occurring in between* [IP_GAP]_. - - """ - new_column = df['open'] < df['close'].shift(1) - return new_column - - -# -# Function gapup -# - -def gapup(df): - r"""Determine whether or not there has been a gap up. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with open and close columns. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - References - ---------- - *A gap is a break between prices on a chart that occurs when the - price of a stock makes a sharp move up or down with no trading - occurring in between* [IP_GAP]_. - - """ - new_column = df['open'] > df['close'].shift(1) - return new_column - - -# -# Function gtval -# - -def gtval(df, c1, c2): - r"""Determine whether or not the first column of a dataframe - is greater than the second. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the two columns ``c1`` and ``c2``. - c1 : str - Name of the first column in the dataframe ``df``. - c2 : str - Name of the second column in the dataframe ``df``. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c1] > df[c2] - return new_column - - -# -# Function gtval0 -# - -def gtval0(df, c1, c2): - r"""For positive values in the first column of the dataframe - that are greater than the second column, get the value in - the first column, otherwise return zero. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the two columns ``c1`` and ``c2``. - c1 : str - Name of the first column in the dataframe ``df``. - c2 : str - Name of the second column in the dataframe ``df``. - - Returns - ------- - new_val : float - A positive value or zero. - - """ - if df[c1] > df[c2] and df[c1] > 0: - new_val = df[c1] - else: - new_val = 0 - return new_val - - -# -# Function haclose -# - -def haclose(df): - r"""Calculate the Heikin-Ashi Close. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with OHLC columns. - - Returns - ------- - haclose_ds : pandas.Series - The series containing the Heikin-Ashi Close. - - """ - haclose_ds = (df['open'] + df['high'] + df['low'] + df['close']) / 4.0 - return haclose_ds - - -# -# Function hahigh -# - -def hahigh(df): - r"""Calculate the Heikin-Ashi High. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with OHLC columns. - - Returns - ------- - hahigh_ds : pandas.Series - The series containing the Heikin-Ashi High. - - """ - hahigh_ds = pd.DataFrame([df['high'], haopen(df), haclose(df)]).max(axis=0) - return hahigh_ds - - -# -# Function halow -# - -def halow(df): - r"""Calculate the Heikin-Ashi Low. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with OHLC columns. - - Returns - ------- - halow_ds : pandas.Series - The series containing the Heikin-Ashi Low. - - """ - halow_ds = pd.DataFrame([df['low'], haopen(df), haclose(df)]).min(axis=0) - return halow_ds - - -# -# Function haopen -# - -def haopen(df): - r"""Calculate the Heikin-Ashi Open. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with OHLC columns. - - Returns - ------- - haopen : pandas.Series - The series containing the Heikin-Ashi Open. - - """ - s1 = haclose(df) - s2 = (df['open'] + df['close']) / 2.0 - dfha = pd.concat([s1, s2], axis=1) - dfha.columns = ['haclose', 'haopen'] - for i in range(1, len(dfha)): - dfha.iloc[i]['haopen'] = (dfha.iloc[i-1]['haopen'] + dfha.iloc[i-1]['haclose']) / 2.0 - return dfha['haopen'] - - -# -# Function higher -# - -def higher(df, c, o=1): - r"""Determine whether or not a series value is higher than - the value ``o`` periods back. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - o : int, optional - Offset value for shifting the series. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c] > df[c].shift(o) - return new_column - - -# -# Function highest -# - -def highest(df, c, p=20): - r"""Calculate the highest value on a rolling basis. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the rolling maximum. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - new_column = df[c].rolling(p).max() - return new_column - - -# -# Function hlrange -# - -def hlrange(df, p=1): - r"""Calculate the Range, the difference between High and Low. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with columns ``high`` and ``low``. - p : int - The period over which the range is calculated. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - new_column = highest(df, 'high', p) - lowest(df, 'low', p) - return new_column - - -# -# Function keltner -# - -def keltner(df, c='close', p=20, atrs=1.5, channel='midline'): - r"""Calculate the Keltner Channels. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - atrs : float - The multiple of Average True Range. - - Returns - ------- - kc : pandas.Series - The series containing the Keltner Channel. - """ - - ds_ema = ema(df, c, p) - atr = USEP.join(['atr', str(p)]) - df = vexec(df, atr) - if channel == 'lower': - kc = ds_ema - df[atr].apply(lambda x: atrs * x) - elif channel == 'upper': - kc = ds_ema + df[atr].apply(lambda x: atrs * x) - else: - kc = ds_ema - return kc - - -# -# Function keltnerlb -# - -def keltnerlb(df, c='close', p=20, atrs=1.5): - r"""Calculate the lower Keltner Channel. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - atrs : float - The multiple of Average True Range. - - Returns - ------- - kclb : pandas.Series - The series containing the lower Keltner Channel. - """ - - kclb = keltner(df, c, p, atrs, channel='lower') - return kclb - - -# -# Function keltnerml -# - -def keltnerml(df, c='close', p=20, atrs=1.5): - r"""Calculate the midline Keltner Channel. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - atrs : float - The multiple of Average True Range. - - Returns - ------- - kcml : pandas.Series - The series containing the midline Keltner Channel. - """ - - kcml = keltner(df, c, p, atrs) - return kcml - - -# -# Function keltnerub -# - -def keltnerub(df, c='close', p=20, atrs=1.5): - r"""Calculate the upper Keltner Channel. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - atrs : float - The multiple of Average True Range. - - Returns - ------- - kcub : pandas.Series - The series containing the upper Keltner Channel. - """ - - kcub = keltner(df, c, p, atrs, channel='upper') - return kcub - - -# -# Function lower -# - -def lower(df, c, o=1): - r"""Determine whether or not a series value is lower than - the value ``o`` periods back. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - o : int, optional - Offset value for shifting the series. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c] < df[c].shift(o) - return new_column - - -# -# Function lowest -# - -def lowest(df, c, p=20): - r"""Calculate the lowest value on a rolling basis. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the rolling minimum. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - return df[c].rolling(p).min() - - -# -# Function ma -# - -def ma(df, c='close', p=20): - r"""Calculate the mean on a rolling basis. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the rolling mean. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *In statistics, a moving average (rolling average or running average) - is a calculation to analyze data points by creating series of averages - of different subsets of the full data set* [WIKI_MA]_. - - .. [WIKI_MA] https://en.wikipedia.org/wiki/Moving_average - - """ - new_column = df[c].rolling(p).mean() - return new_column - - -# -# Function maabove -# - -def maabove(df, c, p=50): - r"""Determine those values of the dataframe that are above the - moving average. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period of the moving average. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c] > ma(df, c, p) - return new_column - - -# -# Function mabelow -# - -def mabelow(df, c, p=50): - r"""Determine those values of the dataframe that are below the - moving average. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period of the moving average. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c] < ma(df, c, p) - return new_column - - -# -# Function maratio -# - -def maratio(df, c, p1=1, p2=10): - r"""Calculate the ratio of two moving averages. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p1 : int - The period of the first moving average. - p2 : int - The period of the second moving average. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - new_column = ma(df, c, p1) / ma(df, c, p2) - return new_column - - -# -# Function negval0 -# - -def negval0(df, c): - r"""Get the negative value, otherwise zero. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - new_column : pandas.Series (float) - Negative value or zero. - - """ - new_column = df[c].apply(lambda x: -x if x < 0 else 0) - return new_column - - -# -# Function negvals -# - -def negvals(df, c): - r"""Find the negative values in the series. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c] < 0 - return new_column - - -# -# Function net -# - -def net(df, c='close', o=1): - r"""Calculate the net change of a given column. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - o : int, optional - Offset value for shifting the series. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *Net change is the difference between the closing price of a security - on the day's trading and the previous day's closing price. Net change - can be positive or negative and is quoted in terms of dollars* [IP_NET]_. - - .. [IP_NET] http://www.investopedia.com/terms/n/netchange.asp - - """ - new_column = df[c] - df[c].shift(o) - return new_column - - -# -# Function netreturn -# - -def netreturn(df, c, o=1): - r"""Calculate the net return, or Return On Invesment (ROI) - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - o : int, optional - Offset value for shifting the series. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *ROI measures the amount of return on an investment relative to the - original cost. To calculate ROI, the benefit (or return) of an - investment is divided by the cost of the investment, and the result - is expressed as a percentage or a ratio* [IP_ROI]_. - - .. [IP_ROI] http://www.investopedia.com/terms/r/returnoninvestment.asp - - """ - new_column = 100 * pchange(df, c, o) - return new_column - - -# -# Function pchange -# - -def pchange(df, c, o=1): - r"""Calculate the percentage change within the same variable. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - o : int - Offset to the previous value. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - new_column = df[c] / df[c].shift(o) - 1.0 - return new_column - - -# -# Function pchange2 -# - -def pchange2(df, c1, c2): - r"""Calculate the percentage change between two variables. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the two columns ``c1`` and ``c2``. - c1 : str - Name of the first column in the dataframe ``df``. - c2 : str - Name of the second column in the dataframe ``df``. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - new_column = df[c1] / df[c2] - 1.0 - return new_column - - -# -# Function posvals -# - -def posvals(df, c): - r"""Find the positive values in the series. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - """ - new_column = df[c] > 0 - return new_column - - -# -# Function posval0 -# - -def posval0(df, c): - r"""Get the positive value, otherwise zero. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - new_column : pandas.Series (float) - Positive value or zero. - - """ - new_column = df[c].apply(lambda x: x if x > 0 else 0) - return new_column - - -# -# Function rindex -# - -def rindex(df, ci, ch, cl, p=1): - r"""Calculate the *range index* spanning a given period ``p``. - - The **range index** is a number between 0 and 100 that - relates the value of the index column ``ci`` to the - high column ``ch`` and the low column ``cl``. For example, - if the low value of the range is 10 and the high value - is 20, then the range index for a value of 15 would be 50%. - The range index for 18 would be 80%. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the columns ``ci``, ``ch``, and ``cl``. - ci : str - Name of the index column in the dataframe ``df``. - ch : str - Name of the high column in the dataframe ``df``. - cl : str - Name of the low column in the dataframe ``df``. - p : int - The period over which the range index of column ``ci`` - is calculated. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - """ - o = p-1 if df[ci].name == 'open' else 0 - hh = highest(df, ch, p) - ll = lowest(df, cl, p) - fn = df[ci].shift(o) - ll - fd = hh - ll - new_column = 100 * fn / fd - return new_column - - -# -# Function rsi -# - -def rsi(df, p=14): - r"""Calculate the Relative Strength Index (RSI). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``net``. - p : int - The period over which to calculate the RSI. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *Developed by J. Welles Wilder, the Relative Strength Index (RSI) is a momentum - oscillator that measures the speed and change of price movements* [SC_RSI]_. - - .. [SC_RSI] http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:relative_strength_index_rsi - - """ - cdiff = 'net' - df = vexec(df, cdiff) - df['pval'] = posval0(df, cdiff) - df['mval'] = negval0(df, cdiff) - upcs = ma(df, 'pval', p) - dpcs = ma(df, 'mval', p) - new_column = 100 - (100 / (1 + (upcs / dpcs))) - return new_column - - -# -# Function runs -# - -def runs(df, c='close', w=20): - r"""Calculate the total number of runs. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - w : int - The rolling period. - - Returns - ------- - runs_value : int - The total number of distinct runs in the rolling window. - - Example - ------- - - >>> runs(df, c, 20) - - """ - - # Calculate the difference between the current and previous value - ds = df[c] - change = ds.diff() - - # Define a function to calculate the number of distinct runs in a window - def calculate_runs(window): - # Create a list of changes - changes = [1 if x > 0 else -1 if x < 0 else 0 for x in window] - # Group by direction of change and count the number of groups - runs = len(list(itertools.groupby(changes))) - # Exclude groups where change is 0 (no change in direction) - return runs if changes[0] != 0 else runs - 1 - - # Apply the function to calculate the number of runs over a rolling window - runs_value = change.rolling(window=w).apply(calculate_runs, raw=True) - - return runs_value.fillna(0).astype(int) - - -# -# Function runtotal -# - -def runtotal(df, c='close', w=50): - r"""Calculate the running total. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - w : int - The rolling period. - - Returns - ------- - running_total : int - The final running total. - - Example - ------- - - >>> runtotal(df, c, 50)) - - """ - - # Calculate the difference between the current and previous value - ds = df[c] - change = ds.diff() - - # Initialize running total Series with zeros - running_total = pd.Series(np.zeros(len(ds)), index=ds.index) - - # Calculate running totals over the rolling window - for i in range(1, len(change)): - if i < w: - # Calculate the running total from the start up to the current index - running_total[i] = running_total[i - 1] + (1 if change[i] > 0 else (-1 if change[i] < 0 else 0)) - else: - # Calculate the running total within the rolling window - window_total = sum(1 if change[j] > 0 else (-1 if change[j] < 0 else 0) for j in range(i - w + 1, i + 1)) - running_total[i] = window_total - - return running_total.astype(int) - - -# -# Function runstest -# - -def runstest(df, c='close', w=20, wfuncs='all'): - r"""Perform a runs test on binary series. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - w : int - The rolling period. - wfuncs : list - The set of runs test functions to apply to the column: - - ``'all'``: - Run all of the functions below. - ``'rtotal'``: - The running total over the ``window`` period. - ``'runs'``: - Total number of runs in ``window``. - ``'streak'``: - The length of the latest streak. - ``'zscore'``: - The Z-Score over the ``window`` period. - - Returns - ------- - new_features : pandas.DataFrame - The dataframe containing the runs test features. - - References - ---------- - For more information about runs tests for detecting non-randomness, - refer to [RUNS]_. - - .. [RUNS] http://www.itl.nist.gov/div898/handbook/eda/section3/eda35d.htm - - """ - - all_funcs = {'runs' : runs, - 'streak' : streak, - 'runtotal' : runtotal, - 'zscore' : zscore} - # use all functions - if 'all' in wfuncs: - wfuncs = list(all_funcs.keys()) - # apply each of the runs functions - new_features = pd.DataFrame() - for wf in wfuncs: - if wf in all_funcs: - new_feature = all_funcs[wf](df, c, w) - new_feature.fillna(0, inplace=True) - new_column_name = USEP.join([wf, c]) - new_feature = new_feature.rename(new_column_name) - frames = [new_features, new_feature] - new_features = pd.concat(frames, axis=1) - else: - logger.info("Runs Function %s not found", w) - return new_features - - -# -# Function split2letters -# - -def split2letters(df, c): - r"""Separate text into distinct characters. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the text column in the dataframe ``df``. - - Returns - ------- - new_feature : pandas.Series - The array containing the new feature. - - Example - ------- - The value 'abc' becomes 'a b c'. - - """ - fc = df[c] - new_feature = None - dtype = fc.dtypes - if dtype == 'object': - fc.fillna(NULLTEXT, inplace=True) - maxlen = fc.astype(str).str.len().max() - if maxlen > 1: - new_feature = fc.apply(lambda x: BSEP.join(list(x))) - return new_feature - - -# -# Function streak -# - -def streak(df, c='close', w=20): - r"""Determine the length of the latest streak. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - w : int - The rolling period. - - Returns - ------- - latest_streak : int - The length of the latest streak. - - Example - ------- - - >>> streak(df, c, 20) - - """ - - # Calculate the difference between the current and previous value - ds = df[c] - change = ds.diff() - - # Initialize streak Series with zeros - latest_streak = pd.Series(np.zeros(len(ds)), index=ds.index) - - # Calculate streaks - for i in range(1, len(change)): - if change[i] > 0: - latest_streak[i] = latest_streak[i-1] + 1 if latest_streak[i-1] >= 0 else 1 - elif change[i] < 0: - latest_streak[i] = latest_streak[i-1] - 1 if latest_streak[i-1] <= 0 else -1 - else: - latest_streak[i] = 0 - - return latest_streak.astype(int) - - -# -# Function tdseqbuy -# - -def tdseqbuy(df, c='close', high='high', low='low'): - r"""Calculate Tom DeMark's Sequential Buy indicator. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the columns ``c``, ``high``, and ``low``. - c : str, optional - Name of the column in the dataframe ``df`` representing the close prices. - high : str, optional - Name of the column in the dataframe ``df`` representing the high prices. - low : str, optional - Name of the column in the dataframe ``df`` representing the low prices. - - Returns - ------- - tdbuy : pandas.Series - The array containing the Sequential Buy count. - - References - ---------- - *Tom DeMark's Sequential indicator is used to identify a potential reversal - of the current trend by comparing the closing price to previous closing - prices over a fixed period* [WIKI_TDSEQ]_. - - .. [WIKI_TDSEQ] https://en.wikipedia.org/wiki/Tom_DeMark_Indicators#TD_Sequential - - """ - - # Initialize columns - tdbuy = pd.Series(0, index=df.index) - - # Calculate the TD Setup - for i in range(4, len(df)): - # Buy Setup: Close less than the close 4 bars earlier for 9 consecutive bars - if df[c].iloc[i] < df[c].iloc[i - 4]: - tdbuy.iloc[i] = tdbuy.iloc[i - 1] + 1 if tdbuy.iloc[i - 1] < 9 else 0 - else: - tdbuy.iloc[i] = 0 - - return tdbuy - - -# -# Function tdseqsell -# - -def tdseqsell(df, c='close', high='high', low='low'): - r"""Calculate Tom DeMark's Sequential Sell indicator. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the columns ``c``, ``high``, and ``low``. - c : str, optional - Name of the column in the dataframe ``df`` representing the close prices. - high : str, optional - Name of the column in the dataframe ``df`` representing the high prices. - low : str, optional - Name of the column in the dataframe ``df`` representing the low prices. - - Returns - ------- - tdsell : pandas.Series - The array containing the Sequential Sell count. - - References - ---------- - *Tom DeMark's Sequential indicator is used to identify a potential reversal - of the current trend by comparing the closing price to previous closing - prices over a fixed period* [WIKI_TDSEQ]_. - - .. [WIKI_TDSEQ] https://en.wikipedia.org/wiki/Tom_DeMark_Indicators#TD_Sequential - - """ - - # Initialize columns - tdsell = pd.Series(0, index=df.index) - - # Calculate the TD Setup - for i in range(4, len(df)): - # Sell Setup: Close greater than the close 4 bars earlier for 9 consecutive bars - if df[c].iloc[i] > df[c].iloc[i - 4]: - tdsell.iloc[i] = tdsell.iloc[i - 1] + 1 if tdsell.iloc[i - 1] < 9 else 0 - else: - tdsell.iloc[i] = 0 - - return tdsell - - -# -# Function texplode -# - -def texplode(df, c): - r"""Get dummy values for a text column. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the text column in the dataframe ``df``. - - Returns - ------- - dummies : pandas.DataFrame - The dataframe containing the dummy variables. - - Example - ------- - - This function is useful for columns that appear to - have separate character codes but are consolidated - into a single column. Here, the column ``c`` is - transformed into five dummy variables. - - === === === === === === - c 0_a 1_x 1_b 2_x 2_z - === === === === === === - abz 1 0 1 0 1 - abz 1 0 1 0 1 - axx 1 1 0 1 0 - abz 1 0 1 0 1 - axz 1 1 0 0 1 - === === === === === === - - """ - fc = df[c] - maxlen = fc.astype(str).str.len().max() - fc.fillna(maxlen * BSEP, inplace=True) - fpad = str().join(['{:', BSEP, '>', str(maxlen), '}']) - fcpad = fc.apply(fpad.format) - fcex = fcpad.apply(lambda x: pd.Series(list(x))) - dummies = pd.get_dummies(fcex) - return dummies - - -# -# Function timeparts -# - -def timeparts(df, c): - r"""Extract time into its components: hour, minute, second. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - - Returns - ------- - time_features : pandas.DataFrame - The dataframe containing the time features. - """ - - ds_dt = pd.to_datetime(df[c].astype(str), format='%H:%M:%S') - time_features = pd.DataFrame() - try: - fhour = pd.Series(ds_dt.dt.hour, name='hour').astype(int) - fminute = pd.Series(ds_dt.dt.minute, name='minute').astype(int) - fsecond = pd.Series(ds_dt.dt.second, name='second').astype(int) - frames = [fhour, fminute, fsecond] - time_features = pd.concat(frames, axis=1) - except: - logger.info("Could not extract time information from the series") - return time_features - - -# -# Function truehigh -# - -def truehigh(df): - r"""Calculate the *True High* value. - - Parameters - ---------- - f : pandas.DataFrame - Dataframe with high and low columns. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *Today's high, or the previous close, whichever is higher* [TS_TR]_. - - .. [TS_TR] http://help.tradestation.com/09_01/tradestationhelp/charting_definitions/true_range.htm - - """ - l1 = ''.join(['low', '[1]']) - df = vexec(df, l1) - new_column = df.apply(c2max, axis=1, args=[l1, 'high']) - return new_column - - -# -# Function truelow -# - -def truelow(df): - r"""Calculate the *True Low* value. - - Parameters - ---------- - f : pandas.DataFrame - Dataframe with high and low columns. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *Today's low, or the previous close, whichever is lower* [TS_TR]_. - - """ - h1 = ''.join(['high', '[1]']) - df = vexec(df, h1) - new_column = df.apply(c2min, axis=1, args=[h1, 'low']) - return new_column - - -# -# Function truerange -# - -def truerange(df): - r"""Calculate the *True Range* value. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with with high and low columns. - - Returns - ------- - new_column : pandas.Series (float) - The array containing the new feature. - - References - ---------- - *True High - True Low* [TS_TR]_. - - """ - new_column = truehigh(df) - truelow(df) - return new_column - - -# -# Function ttmsqueeze -# - -def ttmsqueeze(df, c='close', p=20): - r"""Calculate the TTM Squeeze momentum oscillator. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - - Returns - ------- - ttmosc : float - The value of the TTM Squeeze Indicator. - """ - - # calculate Donchian midline - hh = highest(df, 'high') - ll = lowest(df, 'low') - midp = (hh + ll) / 2.0 - # calculate the Simple Moving Average - sma = ma(df) - # calculate the delta between the midline and SMA - delta = (df[c] - (midp + sma) / 2.0) - # linear regression - fit_y = np.array(range(0, p)) - ttmosc = delta.rolling(window = p).apply(lambda x: - np.polyfit(fit_y, x, 1)[0] * (p-1) + - np.polyfit(fit_y, x, 1)[1], raw=True) - return ttmosc - - -# -# Function ttmsqueezelong -# - -def ttmsqueezelong(df, c='close', p=20, sd=2.0, atrs=1.5): - r"""Signal a TTM Squeeze Long Entry. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - sd : float - The number of standard deviations. - atrs : float - The multiple of Average True Range. - - Returns - ------- - squeezelong : bool - True if there is a TTM Squeeze Long Entry. - """ - - squeeze_off = ttmsqueezeoff(df, c, p, sd, atrs) - ttm_squeeze = ttmsqueeze(df, c, p) - squeeze1 = squeeze_off.shift(1).fillna(False) - squeeze2 = squeeze_off.shift(2).fillna(False) - long_cond1 = np.logical_and(squeeze1, ~squeeze2) - long_cond2 = np.greater(ttm_squeeze, 0) - squeezelong = np.logical_and(long_cond1, long_cond2) - return squeezelong - - -# -# Function ttmsqueezeoff -# - -def ttmsqueezeoff(df, c='close', p=20, sd=2.0, atrs=1.5): - r"""Determine the TTM Squeeze Off condition. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - sd : float - The number of standard deviations. - atrs : float - The multiple of Average True Range. - - Returns - ------- - squeezeoff : bool - The status of the TTM Squeeze Off Indicator. - """ - - kclb = keltner(df, c, p, atrs, channel='lower') - kcub = keltner(df, c, p, atrs, channel='upper') - bblb = bbands(df, c, p, sd) - bbub = bbands(df, c, p, sd, low_band=False) - squeezeoff = np.logical_and(np.greater(bbub, kcub), np.less(bblb, kclb)) - return squeezeoff - - -# -# Function ttmsqueezeon -# - -def ttmsqueezeon(df, c='close', p=20, sd=2.0, atrs=1.5): - r"""Determine the TTM Squeeze On condition. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - sd : float - The number of standard deviations. - atrs : float - The multiple of Average True Range. - - Returns - ------- - squeezeon : bool - The status of the TTM Squeeze On Indicator. - """ - - kclb = keltner(df, c, p, atrs, channel='lower') - kcub = keltner(df, c, p, atrs, channel='upper') - bblb = bbands(df, c, p, sd) - bbub = bbands(df, c, p, sd, low_band=False) - squeezeon = np.logical_and(np.less(bbub, kcub), np.greater(bblb, kclb)) - return squeezeon - - -# -# Function ttmsqueezeshort -# - -def ttmsqueezeshort(df, c='close', p=20, sd=2.0, atrs=1.5): - r"""Signal a TTM Squeeze Short Entry. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - p : int - The period over which to calculate the Exponential Moving Average. - sd : float - The number of standard deviations. - atrs : float - The multiple of Average True Range. - - Returns - ------- - squeezeshort : bool - True if there is a TTM Squeeze Short Entry. - """ - - squeeze_off = ttmsqueezeoff(df, c, p, sd, atrs) - ttm_squeeze = ttmsqueeze(df, c, p) - - squeeze1 = squeeze_off.shift(1).fillna(False) - squeeze2 = squeeze_off.shift(2).fillna(False) - short_cond1 = np.logical_and(squeeze1, ~squeeze2) - short_cond2 = np.less(ttm_squeeze, 0) - squeezeshort = np.logical_and(short_cond1, short_cond2) - return squeezeshort - - -# -# Function vwap -# - -def vwap(df, c='close', granularity='day', anchor_dates=None): - """ - Adjusted VWAP calculation using Unix timestamps for compatibility with np.digitize. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - granularity : str - The calendrical period over which to calculate VWAP. - anchor_dates : list - The set of dates over which to calculate VWAP. - - Returns - ------- - vwap_value : float - The calculated Volume-Weighted Average Price (VWAP). - """ - - # Ensure the index is in datetime format - df.index = pd.to_datetime(df.index) - - if not anchor_dates: - # Generate anchor dates automatically based on granularity - if granularity == 'day': - anchor_dates = pd.Series(df.index.normalize()).unique() - elif granularity == 'week': - anchor_dates = (df.index - pd.to_timedelta(df.index.dayofweek, unit='d')).normalize().unique() - elif granularity == 'month': - anchor_dates = df.index.to_period('M').to_timestamp().normalize().unique() - elif granularity == 'quarter': - anchor_dates = df.index.to_period('Q').to_timestamp().normalize().unique() - else: - anchor_dates = pd.to_datetime(anchor_dates).normalize() - - # Convert datetime index and anchor_dates to Unix timestamps for np.digitize - denominator = 10**9 - unix_index = df.index.astype(np.int64) // denominator - unix_anchor_dates = anchor_dates.astype(np.int64) // denominator - - # Assign periods based on Unix timestamps - df['period'] = np.digitize(unix_index, bins=unix_anchor_dates, right=False) - - # Calculate VWAP - df['dollar_volume'] = df[c] * df['volume'] - grouped = df.groupby('period') - df['cumulative_dollar_volume'] = grouped['dollar_volume'].cumsum() - df['cumulative_volume'] = grouped['volume'].cumsum() - df['vwap'] = df['cumulative_dollar_volume'] / df['cumulative_volume'] - - # Clean up by dropping temporary columns and keeping the original datetime index - vwap_value = df[['vwap']].copy() - df.drop(['period', 'dollar_volume', 'cumulative_dollar_volume', 'cumulative_volume', 'vwap'], - axis=1, inplace=True) - - return vwap_value - - -# -# Function xmadown -# - -def xmadown(df, c='close', pfast=20, pslow=50): - r"""Determine those values of the dataframe that cross below the - moving average. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str, optional - Name of the column in the dataframe ``df``. - pfast : int, optional - The period of the fast moving average. - pslow : int, optional - The period of the slow moving average. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - References - ---------- - *In the statistics of time series, and in particular the analysis - of financial time series for stock trading purposes, a moving-average - crossover occurs when, on plotting two moving averages each based - on different degrees of smoothing, the traces of these moving averages - cross* [WIKI_XMA]_. - - .. [WIKI_XMA] https://en.wikipedia.org/wiki/Moving_average_crossover - - """ - sma = ma(df, c, pfast) - sma_prev = sma.shift(1) - lma = ma(df, c, pslow) - lma_prev = lma.shift(1) - new_column = (sma < lma) & (sma_prev > lma_prev) - return new_column - - -# -# Function xmaup -# - -def xmaup(df, c='close', pfast=20, pslow=50): - r"""Determine those values of the dataframe that are below the - moving average. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str, optional - Name of the column in the dataframe ``df``. - pfast : int, optional - The period of the fast moving average. - pslow : int, optional - The period of the slow moving average. - - Returns - ------- - new_column : pandas.Series (bool) - The array containing the new feature. - - References - ---------- - *In the statistics of time series, and in particular the analysis - of financial time series for stock trading purposes, a moving-average - crossover occurs when, on plotting two moving averages each based - on different degrees of smoothing, the traces of these moving averages - cross* [WIKI_XMA]_. - - """ - sma = ma(df, c, pfast) - sma_prev = sma.shift(1) - lma = ma(df, c, pslow) - lma_prev = lma.shift(1) - new_column = (sma > lma) & (sma_prev < lma_prev) - return new_column - - -# -# Function zscore -# - -def zscore(df, c='close', w=20): - r"""Calculate the Z-Score. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe containing the column ``c``. - c : str - Name of the column in the dataframe ``df``. - w : int - The rolling period. - - Returns - ------- - zscore : float - The value of the Z-Score. - - References - ---------- - To calculate the Z-Score, you can find more information here [ZSCORE]_. - - .. [ZSCORE] https://en.wikipedia.org/wiki/Standard_score - - Example - ------- - - >>> zscore(f, c, 20) - - """ - ds = df[c] - r = ds.rolling(window=w) - m = r.mean().shift(1) - s = r.std(ddof=0).shift(1) - zscore = (ds - m) / s - return zscore - - -# -# Shannon's Demon Functions -# - - -def wdev(df): - r"""Calculate weight deviation based on price movements. - - Simulates portfolio weight deviation by tracking price changes - from a 50/50 rebalanced portfolio assumption. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with OHLCV data. - - Returns - ------- - new_column : pandas.Series (float) - The simulated weight deviation values. - """ - # Calculate returns and cumulative effect on portfolio weights - returns = df['close'].pct_change() - # Simulate portfolio drift from 50/50 allocation - # This is a simplified version - actual weight deviation from equal allocation - cumulative_return = (1 + returns).cumprod() - # Weight deviation from 50/50 (0.5 target) - current_weight = cumulative_return / (1 + cumulative_return) - weight_deviation = current_weight - 0.5 - return weight_deviation - - -def wdevhigh(df): - r"""Determine if weight deviation is high (>= 0.2). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with weight_deviation column. - - Returns - ------- - new_column : pandas.Series (bool) - True when absolute weight deviation >= 0.2. - """ - # Ensure wdev column exists - if 'wdev' not in df.columns: - df['wdev'] = wdev(df) - return abs(df['wdev']) >= 0.2 - - -def wdevlow(df): - r"""Determine if weight deviation is low (<= 0.05). - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with weight_deviation column. - - Returns - ------- - new_column : pandas.Series (bool) - True when absolute weight deviation <= 0.05. - """ - # Ensure wdev column exists - if 'wdev' not in df.columns: - df['wdev'] = wdev(df) - return abs(df['wdev']) <= 0.05 - - -def shannlong(df): - r"""Shannon's Demon long signal. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with weight_deviation column. - - Returns - ------- - new_column : pandas.Series (bool) - True when high weight deviation and positive weight deviation. - """ - # Ensure required columns exist - if 'wdev' not in df.columns: - df['wdev'] = wdev(df) - if 'wdevhigh' not in df.columns: - df['wdevhigh'] = wdevhigh(df) - return df['wdevhigh'] & (df['wdev'] > 0) - - -def shannshort(df): - r"""Shannon's Demon short signal. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with weight_deviation column. - - Returns - ------- - new_column : pandas.Series (bool) - True when high weight deviation and negative weight deviation. - """ - # Ensure required columns exist - if 'wdev' not in df.columns: - df['wdev'] = wdev(df) - if 'wdevhigh' not in df.columns: - df['wdevhigh'] = wdevhigh(df) - return df['wdevhigh'] & (df['wdev'] < 0) - - -def shannhold(df): - r"""Shannon's Demon hold signal. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with weight_deviation column. - - Returns - ------- - new_column : pandas.Series (bool) - True when weight deviation is low. - """ - # Ensure required columns exist - if 'wdevlow' not in df.columns: - df['wdevlow'] = wdevlow(df) - return df['wdevlow'] - - -def rebalancesignal(df): - r"""Shannon's Demon rebalance signal for ML target. - - Parameters - ---------- - df : pandas.DataFrame - Dataframe with Shannon signal columns. - - Returns - ------- - new_column : pandas.Series (int) - 1 for rebalancing periods, 0 for hold periods. - """ - # Ensure required columns exist - if 'shannlong' not in df.columns: - df['shannlong'] = shannlong(df) - if 'shannshort' not in df.columns: - df['shannshort'] = shannshort(df) - - # Create binary target: 1 for rebalance (long or short), 0 for hold - return (df['shannlong'] | df['shannshort']).astype(int) diff --git a/alphapy/variables.py b/alphapy/variables.py index 10f58f0..673d410 100644 --- a/alphapy/variables.py +++ b/alphapy/variables.py @@ -47,7 +47,6 @@ import warnings import pandas as pd -import polars as pl warnings.simplefilter(action='ignore', category=DeprecationWarning) warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning) @@ -60,18 +59,13 @@ from collections import OrderedDict from importlib import import_module import logging -import math import re import sys # AlphaPy Imports from alphapy.alias import get_alias -from alphapy.frame import Frame -from alphapy.frame import frame_name -from alphapy.globals import BarType from alphapy.globals import LOFF, ROFF, USEP -from alphapy.space import Space from alphapy.utilities import valid_name @@ -477,7 +471,7 @@ def vfunc(f, v, vfuncs): module = module_name break # If the module was found, import the external transform function, - # else search the local namespace and AlphaPy. + # else search the local namespace. if module: ext_module = import_module(module) func = getattr(ext_module, func_name) @@ -487,12 +481,7 @@ def vfunc(f, v, vfuncs): if func_name in dir(module): func = getattr(module, func_name) else: - # Search the AlphaPy namespace - try: - ap_module = import_module('alphapy.transforms') - func = getattr(ap_module, func_name) - except: - func = None + func = None # return function and parameter list if func: logger.debug("Found function %s with parameters %s", func_name, newlist) @@ -507,10 +496,8 @@ def vexec(f, v, vfuncs=None): r"""Add a variable to the given dataframe. This is the core function for adding a variable to a dataframe. - The default variable functions are already defined locally - in ``alphapy.transforms``; however, you may want to define your - own variable functions. If so, then the ``vfuncs`` parameter - will contain the list of modules and functions to be imported + To define your own variable functions, populate the ``vfuncs`` + parameter with the list of modules and functions to be imported and applied by the ``vexec`` function. To write your own variable function, your function must have @@ -562,270 +549,6 @@ def vexec(f, v, vfuncs=None): return f -# -# Function vapply -# - -def vapply(group, market_specs, vfuncs=None): - r"""Apply a set of variables to multiple dataframes. - - Parameters - ---------- - group : alphapy.Group - The input group. - market_specs : dict - The specifications for controlling the MarketFlow pipeline. - vfuncs : dict, optional - Dictionary of external modules and functions. - - Returns - ------- - dfs : list - The list of pandas dataframes to analyze. - - Other Parameters - ---------------- - Frame.frames : dict - Global dictionary of dataframes - - """ - - # Get group information - - gspace = group.space - gsubject = gspace.subject - gsource = gspace.source - symbols = [item.lower() for item in group.members] - - # Extract market specification fields - - fractals = market_specs['fractals'] - features = market_specs['features'] - bar_type = market_specs['bar_type'] - - # Initialize list of dataframes, function dictionary, and possibly bar type - dffs = [] - - # Apply the variables to each frame - - for symbol in symbols: - logger.info("Applying Variables to %s", symbol.upper()) - # apply variables to each of the fractals - dfs = [] - for fractal in fractals: - logger.info("Fractal: %s", fractal) - fspace = Space(gsubject, gsource, fractal) - fname = frame_name(symbol, fspace) - if fname in Frame.frames: - df = Frame.frames[fname].df - # Convert Polars to pandas for variable processing - if isinstance(df, pl.DataFrame): - df = df.to_pandas() - # Set datetime as index if present - if 'datetime' in df.columns: - df = df.set_index('datetime') - if not df.empty: - # Remap to a different bar type if specified - df = map_bar_type(df, bar_type, fractal) - # create the features in the dataframe - all_features = features[fractal] - for feature in all_features: - allv = vtree(feature) - logger.debug("%s Feature: %s_%s", symbol.upper(), feature, fractal) - for v in allv: - logger.debug("%s Variable: %s_%s", symbol.upper(), v, fractal) - df = vexec(df, v, vfuncs) - # rename the columns - df = df.add_suffix(USEP + fractal) - # add the fractal frame to the list - dfs.append(df) - else: - logger.info("Empty Dataframe for %s [%s]", symbol.upper(), fractal) - else: - logger.info("Dataframe Not Found for %s [%s]", symbol.upper(), fractal) - # join all fractal frames - logger.info("Joining Frames: %s", fractals) - for indexf, df in enumerate(dfs): - # upsample successive frames - if indexf > 0: - # shift higher fractals - df = df.shift(1) - # resample for base fractal - dfr = df.resample(fractals[0]).ffill() - # join frames - dfj = dfj.merge(dfr, left_index=True, right_index=True) - else: - dfj = df - # add the symbol - colsym = 'symbol' - dfj[colsym] = symbol - first_col = dfj.pop(colsym) - dfj.insert(0, colsym, first_col) - # assign global frame for trading - tspace = Space(gsubject, gsource, 'ALL') - _ = Frame(symbol.lower(), tspace, dfj) - # append frame to list of dataframes - dffs.append(dfj) - # return all of the dataframes - return dffs - - -# -# Function get_daily_dollar_vol -# - -def get_daily_dollar_vol(df, p=60): - r"""Calculate daily dollar volume. - - Parameters - ---------- - df : pandas.DataFrame - Frame containing the close and volume values. - p : int - The lookback period for computing daily dollar volume. - - Returns - ------- - ds_dv : pandas.Series (float) - The array of dollar volumes. - - """ - - logger.info('Calculating daily dollar volume') - - fractal_daily = '1D' - ds_price_avg = df['close'].groupby(pd.Grouper(freq=fractal_daily)).mean().dropna() - ds_volume_sum = df['volume'].groupby(pd.Grouper(freq=fractal_daily)).sum() - ds_volume_sum = ds_volume_sum[ds_volume_sum > 0] - ds_dv = ds_price_avg * ds_volume_sum - ds_dv = ds_dv.ewm(span=p, min_periods=p).mean() - return ds_dv[-1] - - -# -# Function map_dollar_bars -# - -def map_dollar_bars(df, cols, fractal, p=100, pv_factor=1.0): - r"""Map time bars to dollar bars. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to convert to a different bar type. - cols: list - List of column names in the price dataframe. - fractal : str - Pandas offset alias. - p : int - The period over which to calculate dollar volume. - pv_factor : float - The multiple of daily dollar volume for the dollar bar threshold. - - Returns - ------- - dollar_bars : list - The list of dollar bar records. - - """ - - # Get the daily dollar volume. - - ddv = get_daily_dollar_vol(df, p) - time_factor = pd.Timedelta(fractal) / pd.Timedelta('1D') - dollar_threshold = pv_factor * time_factor * ddv - - # Create a dictionary from the original dataframe. - df_dict = df.to_dict('records') - - # Initialize an empty list of dollar bars. - dollar_bars = [] - - # Initialize the running dollar volume at zero. - running_volume = 0 - - # Initialize the running high and low with placeholder values. - running_high, running_low = 0, math.inf - - # Iterate over each time bar. - - for i in range(len(df_dict)): - next_timestamp, next_open, next_high, next_low, next_close, next_volume = \ - [df_dict[i][k] for k in cols] - # calculate the midpoint price - midpoint_price = ((next_open) + (next_close))/2 - # get the approximate dollar volume of the bar with the volume and midpoint price - dollar_volume = next_volume * midpoint_price - # update the running high and low - running_high, running_low = max(running_high, next_high), min(running_low, next_low) - # check if the dollar volume exceeds the threshold - if dollar_volume + running_volume >= dollar_threshold: - # set the timestamp for the dollar bar as the next incremental fractal - bar_timestamp = next_timestamp + pd.Timedelta(fractal) - # add a new dollar bar to the list of dollar bars - dollar_bars += [{'timestamp': bar_timestamp, - 'open': next_open, - 'high': running_high, - 'low': running_low, - 'close': next_close}] - # reset the running volume to zero - running_volume = 0 - # reset the running high and low to placeholder values - running_high, running_low = 0, math.inf - # otherwise, increment the running volume - else: - running_volume += dollar_volume - - # return the list of dollar bars - return dollar_bars - - -# -# Function map_bar_type -# - -def map_bar_type(df, bar_type, fractal, p=100, pv_factor=1.0): - r"""Map time bars to a different bar type. - - Parameters - ---------- - df : pandas.DataFrame - The dataframe to convert to a different bar type. - bar_type : Enum.BarType - The bar type for conversion (Dollar Bar, Heikin-Ashi, et al). - fractal : str - Pandas offset alias. - p : int - The period over which to calculate dollar volume. - pv_factor : float - The multiple of daily dollar volume for the dollar bar threshold. - - Returns - ------- - df : pandas.DataFrame - The converted dataframe for the target bar type. - """ - - if bar_type == BarType.time: - pass - elif bar_type == BarType.dollar: - cols = ['datetime', 'open', 'high', 'low', 'close', 'volume'] - df = map_dollar_bars(df, cols, fractal, p, pv_factor) - elif bar_type == BarType.heikinashi: - ha_map = {'open' : 'openha', - 'high' : 'highha', - 'low' : 'lowha', - 'close' : 'closeha'} - new_names = [x+'0' for x in ha_map.keys()] - for v in ha_map.items(): - df = vexec(df, ha_map[v]) - df.rename(columns=dict(zip(ha_map.keys(), new_names)), inplace=True) - df.rename(columns=dict(zip(ha_map.values(), ha_map.keys())), inplace=True) - else: - raise ValueError("Unknown Bar Type: %s" % bar_type) - return df - - # # Function cached_vtree # diff --git a/config/alphapy.yml b/config/alphapy.yml index abcb92a..76f3dc2 100644 --- a/config/alphapy.yml +++ b/config/alphapy.yml @@ -10,7 +10,7 @@ # Whether to use the AlphaPy server (usually False for local development) use_server: False -# Main data directory for storing market data and other datasets +# Main data directory for datasets # Replace with your actual data directory path data_dir: /Users/markconway/Projects/alphapy-pro/data diff --git a/docs/index.rst b/docs/index.rst index 0471d77..8de161c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -16,8 +16,6 @@ AlphaPy Pro Documentation :caption: Tutorials tutorials/kaggle - tutorials/market - tutorials/system .. toctree:: :maxdepth: 2 @@ -25,7 +23,6 @@ AlphaPy Pro Documentation user_guide/pipelines user_guide/project - user_guide/market_flow .. toctree:: :maxdepth: 2 diff --git a/docs/introduction/command.rst b/docs/introduction/command.rst index f846178..61a9716 100644 --- a/docs/introduction/command.rst +++ b/docs/introduction/command.rst @@ -3,7 +3,7 @@ Command Line Interface The AlphaPy Pro Command Line Interface (CLI) provides a streamlined way to run machine learning pipelines. Simply navigate to your project -directory and run the appropriate command. +directory and run ``alphapy``. Basic Usage ----------- @@ -44,41 +44,12 @@ Example:: # Train with verbose output alphapy --train --verbose -**mflow** - MarketFlow Pipeline -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For financial market analysis and trading systems:: - - mflow [options] - -Options: - -* ``--train`` - Train new models [Default] -* ``--predict`` - Make predictions from saved models -* ``--tdate YYYY-MM-DD`` - Training start date (Default: earliest date) -* ``--pdate YYYY-MM-DD`` - Prediction date (Default: today) -* ``--lookback N`` - Number of days for lookback window -* ``--forecast N`` - Number of days to forecast - -Examples:: - - # Train a market model - mflow - - # Train from a specific date - mflow --tdate 2023-01-01 - - # Make predictions for a specific date - mflow --predict --pdate 2024-01-15 - - # Use lookback window - mflow --lookback 252 --forecast 20 - **Custom Pipelines** -~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~ -You can create custom domain-specific pipelines following the MarketFlow pattern. -See the AlphaPy Pro architecture documentation for implementation details. +You can create custom data-preparation pipelines around AlphaPy by producing +the canonical ``train``/``test`` inputs expected by the package and then +running ``alphapy`` from the project directory. Project Structure Requirements ------------------------------ @@ -135,18 +106,12 @@ Then run:: # Use the model for multiple prediction sets alphapy --predict -**4. Market Backtesting**:: - - # Run market analysis with specific dates - mflow --tdate 2020-01-01 --pdate 2023-12-31 - Logging and Debugging --------------------- AlphaPy Pro creates detailed logs: * ``alphapy.log`` - Main pipeline log -* ``market_flow.log`` - MarketFlow specific log (when using mflow) To increase log verbosity:: @@ -161,4 +126,4 @@ Tips and Best Practices 4. **Keep model.yml under version control** 5. **Review plots** in the output directory for insights -For more details on configuration options, see :doc:`../user_guide/project`. \ No newline at end of file +For more details on configuration options, see :doc:`../user_guide/project`. diff --git a/docs/introduction/install.rst b/docs/introduction/install.rst index af3d080..0e98981 100644 --- a/docs/introduction/install.rst +++ b/docs/introduction/install.rst @@ -1,7 +1,7 @@ Installation ============ -AlphaPy Pro requires Python 3.10 or higher. You can install it directly +AlphaPy Pro requires Python 3.12 or higher. You can install it directly from the source repository or via pip after it's published. Development Installation @@ -12,7 +12,7 @@ in editable mode:: git clone https://github.com/ScottFreeLLC/alphapy-pro.git cd alphapy-pro - pip install -e . + pip install -e ".[dev]" This will install AlphaPy Pro along with all required dependencies. @@ -29,25 +29,9 @@ AlphaPy Pro will automatically install the following core dependencies: Additional dependencies for specific features: -* **Market Data**: yfinance, polygon-api-client, pandas-datareader * **Feature Engineering**: category_encoders, lofo-importance * **Imbalanced Learning**: imbalanced-learn * **Calibration**: venn-abers -* **Portfolio Analysis**: pyfolio (optional, for legacy support) - -Using uv (Recommended) ----------------------- - -`uv `_ is a fast Python package manager. -Install it following the instructions at https://docs.astral.sh/uv/getting-started/installation/ - -Then create a virtual environment and install AlphaPy Pro:: - - git clone https://github.com/ScottFreeLLC/alphapy-pro.git - cd alphapy-pro - uv venv - source .venv/bin/activate # On Windows: .venv\Scripts\activate - uv pip install -e . Platform-Specific Notes ----------------------- @@ -71,10 +55,7 @@ After installation, verify that AlphaPy Pro is correctly installed:: # Check if the alphapy command is available alphapy --help - - # Check if the mflow command is available - mflow --help - + # In Python, verify imports python -c "import alphapy; print(alphapy.__version__)" @@ -92,4 +73,4 @@ For specific package issues, consult the documentation for that package: * XGBoost: https://xgboost.readthedocs.io/ * LightGBM: https://lightgbm.readthedocs.io/ -* CatBoost: https://catboost.ai/docs/ \ No newline at end of file +* CatBoost: https://catboost.ai/docs/ diff --git a/docs/introduction/introduction.rst b/docs/introduction/introduction.rst index 6bbdb10..0e93aed 100644 --- a/docs/introduction/introduction.rst +++ b/docs/introduction/introduction.rst @@ -1,26 +1,22 @@ Introduction ============ -**AlphaPy Pro** is an advanced machine learning framework for data scientists -and quantitative traders. Built on top of ``scikit-learn``, ``pandas``, and -modern ML libraries, it provides a comprehensive toolkit for feature engineering, -model development, and systematic trading. Here are just some of the things you -can do with AlphaPy Pro: +**AlphaPy Pro** is a machine learning framework for configuration-driven model +development. Built on top of ``scikit-learn``, ``pandas``, and modern ML +libraries, it provides a reusable toolkit for feature engineering, model +training, evaluation, and prediction. Here are some of the things you can do +with AlphaPy Pro: * Build and optimize ML models using ``scikit-learn``, ``XGBoost``, ``LightGBM``, and ``CatBoost``. -* Analyze financial markets with *MarketFlow* using multiple data providers. -* Implement advanced trading strategies with meta-labeling and the Triple Barrier Method. -* Develop and backtest trading systems with portfolio analysis. -* Implement custom domain-specific pipelines for specialized applications. - -The ``alphapy`` package is the core platform providing the ML pipeline. -The *domain* pipeline MarketFlow (``mflow``) runs on top of ``alphapy`` -for financial market analysis. As shown in the diagram below, we separate -the domain pipeline from the model pipeline. The domain pipeline transforms -raw application data into canonical form—training and testing sets—while -the model pipeline handles feature engineering, model training, and evaluation. -This architecture has been refined through numerous Kaggle competitions and -real-world trading applications. +* Configure experiments with YAML instead of writing one-off training scripts. +* Create custom domain-specific pipelines around a shared model pipeline. +* Run classification, regression, ranking, and time-series style workflows. + +The ``alphapy`` package provides the reusable model pipeline. Domain-specific +applications prepare canonical training and testing data outside the package, +then hand that data to AlphaPy for feature engineering, model training, and +evaluation. In ``v4.0.0``, the finance-specific MarketFlow stack was moved to +``alphapy-finance`` so ``alphapy-pro`` can stay focused on the ML core. .. image:: alphapy_pipeline.png :alt: AlphaPy Model Pipeline @@ -107,12 +103,10 @@ Key Features **AlphaPy Pro** includes several advanced features for modern ML workflows: -* **Meta-Labeling**: Triple Barrier Method for advanced financial ML * **Feature Engineering**: Automated feature generation with clustering, interactions, and transformations * **Feature Selection**: LOFO (Leave One Feature Out) importance and univariate selection * **Model Calibration**: Probability calibration with sigmoid and isotonic methods * **Advanced Visualization**: Learning curves, ROC curves, confusion matrices, and feature importance plots -* **Multiple Data Sources**: EODHD, Yahoo Finance, Polygon * **Grid Search**: Randomized and systematic hyperparameter optimization * **Ensemble Methods**: Model blending and stacking @@ -125,7 +119,5 @@ External Packages * **Feature Engineering**: category_encoders, lofo-importance * **Imbalanced Learning**: imbalanced-learn (SMOTE, ADASYN, etc.) * **Calibration**: venn-abers for probability calibration -* **Market Data**: yfinance, polygon-api-client, pandas-datareader -* **Portfolio Analysis**: pyfolio (legacy), custom portfolio analytics * **Visualization**: matplotlib, seaborn, plotly * **Time Series**: statsmodels, arch diff --git a/docs/introduction/quickstart.rst b/docs/introduction/quickstart.rst index 0124479..6434e47 100644 --- a/docs/introduction/quickstart.rst +++ b/docs/introduction/quickstart.rst @@ -5,7 +5,7 @@ Install AlphaPy Pro from source:: git clone https://github.com/ScottFreeLLC/alphapy-pro.git cd alphapy-pro - pip install -e . + pip install -e ".[dev]" .. note:: Please refer to :doc:`install` for detailed installation instructions. @@ -43,17 +43,16 @@ Example Projects The repository includes several example projects: * **Kaggle** - Titanic survival prediction -* **Shannon's Demon** - Trading strategy implementation -* **Time Series** - Market prediction examples -* **Triple Barrier Method** - Advanced labeling for financial ML +* **Pizza** - Learning-to-rank example +* **Time Series** - Generic forecasting example Quick Examples -------------- -**Running a Market Analysis**:: +**Running a Time-Series Example**:: cd projects/time-series - mflow + alphapy **Customizing Model Configuration**: @@ -81,20 +80,17 @@ Edit ``config/model.yml`` to change algorithms, features, or parameters:: Working with Notebooks ---------------------- -You can also work with AlphaPy Pro in Jupyter notebooks: +You can also work with AlphaPy Pro in Jupyter notebooks for data preparation +and inspection, while using the CLI for end-to-end runs: .. code-block:: python - from alphapy.model import create_model from alphapy.frame import read_frame # Load your data train_df = read_frame('data/train.csv') test_df = read_frame('data/test.csv') - # Create and train models - model = create_model(train_df, test_df, 'config/model.yml') - .. note:: AlphaPy Pro supports both command-line and notebook workflows. We recommend using the command line for production runs and notebooks for exploratory analysis. @@ -104,5 +100,4 @@ Next Steps * Explore the example projects in ``projects/`` * Read the :doc:`../user_guide/pipelines` guide -* Learn about :doc:`../user_guide/market_flow` for trading applications -* Check out the :doc:`../tutorials/kaggle` tutorial \ No newline at end of file +* Check out the :doc:`../tutorials/kaggle` tutorial diff --git a/docs/source/alphapy.rst b/docs/source/alphapy.rst index cf9e401..162619c 100644 --- a/docs/source/alphapy.rst +++ b/docs/source/alphapy.rst @@ -76,22 +76,6 @@ alphapy.group module :undoc-members: :show-inheritance: -alphapy.metalabel module ------------------------- - -.. automodule:: alphapy.metalabel - :members: - :undoc-members: - :show-inheritance: - -alphapy.mflow_main module -------------------------- - -.. automodule:: alphapy.mflow_main - :members: - :undoc-members: - :show-inheritance: - alphapy.model module -------------------- @@ -100,14 +84,6 @@ alphapy.model module :undoc-members: :show-inheritance: -alphapy.nlp module ------------------- - -.. automodule:: alphapy.nlp - :members: - :undoc-members: - :show-inheritance: - alphapy.optimize module ----------------------- @@ -124,14 +100,6 @@ alphapy.plots module :undoc-members: :show-inheritance: -alphapy.portfolio module ------------------------- - -.. automodule:: alphapy.portfolio - :members: - :undoc-members: - :show-inheritance: - alphapy.space module -------------------- @@ -140,22 +108,6 @@ alphapy.space module :undoc-members: :show-inheritance: -alphapy.system module ---------------------- - -.. automodule:: alphapy.system - :members: - :undoc-members: - :show-inheritance: - -alphapy.transforms module -------------------------- - -.. automodule:: alphapy.transforms - :members: - :undoc-members: - :show-inheritance: - alphapy.utilities module ------------------------ @@ -178,4 +130,4 @@ Module contents .. automodule:: alphapy :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: diff --git a/docs/tutorials/closer_market.yml b/docs/tutorials/closer_market.yml deleted file mode 100644 index 76debb1..0000000 --- a/docs/tutorials/closer_market.yml +++ /dev/null @@ -1,30 +0,0 @@ -market: - create_model : False - data_fractal : 1d - data_history : 500 - forecast_period : 1 - fractal : 1d - lag_period : 1 - leaders : [] - predict_history : 50 - schema : quandl_wiki - subject : stock - target_group : faang - -system: - name : 'closer' - holdperiod : 0 - longentry : hc - longexit : - shortentry : lc - shortexit : - scale : False - -groups: - faang : ['fb', 'aapl', 'amzn', 'nflx', 'googl'] - -features : ['hc', 'lc'] - -aliases: - hc : 'higher_close' - lc : 'lower_close' diff --git a/docs/tutorials/market.rst b/docs/tutorials/market.rst deleted file mode 100644 index 06d0fc6..0000000 --- a/docs/tutorials/market.rst +++ /dev/null @@ -1,176 +0,0 @@ -Market Prediction Tutorial -========================== - -*MarketFlow Running Time: Approximately 6 minutes* - -.. image:: amzn.png - :alt: Amazon Candlestick Chart - :width: 80% - :align: center - -Machine learning subsumes *technical analysis* because collectively, -technical analysis is just a set of features for market prediction. -We can use machine learning as a feature blender for moving averages, -indicators such as RSI and ADX, and even representations of chart -formations such as double tops and head-and-shoulder patterns. - -We are not directly predicting net return in our models, although -that is the ultimate goal. By characterizing the market with models, -we can increase the Return On Investment (ROI). We have a wide range -of dependent or target variables from which to choose, not just net -return. There is more power in building a classifier rather than a -more traditional regression model, so we want to define binary -conditions such as whether or not today is going to be a trend day, -rather than a numerical prediction of today’s return. - -In this tutorial, we will train a model that predicts whether or -not the next day will have a larger-than-average range. This is -important for deciding which system to deploy on the prediction -day. If our model gives us predictive power, then we can filter -out those days where trading a given system is a losing strategy. - -**Step 1**: From the ``examples`` directory, change your -directory:: - - cd "Trading Model" - -Before running MarketFlow, let's briefly review the configuration -files in the ``config`` directory: - -``market.yml``: - The MarketFlow configuration file - -``model.yml``: - The AlphaPy configuration file - -In ``market.yml``, we limit our model to six stocks in the target -group ``test``, going back 2000 trading days. You can define any -group of stock symbols in the ``groups`` section, and then set -the ``target_group`` attribute in the ``market`` section to the -name of that group. - -This is a 1-day forecast, but we also use those features that can -be calculated at the market open, such as gap information in the -``leaders`` section. In the ``features`` section, we define many -variables for moving averages, historical range, RSI, volatility, -and volume. - -.. literalinclude:: rrover_market.yml - :language: yaml - :caption: **market.yml** - -In each of the tutorials, we experiment with different options in -``model.yml`` to run AlphaPy. Here, we first apply univariate feature -selection and then run a random forest classifier with Recursive -Feature Elimination, including Cross-Validation (RFECV). When you -choose RFECV, the process takes much longer, so if you want to see -more logging, then increase the ``verbosity`` level in the ``pipeline`` -section. - -Since stock prices are time series data, we apply the ``runs_test`` -function to twelve features in the ``treatments`` section. Treatments -are powerful because you can write any function to extrapolate new -features from existing ones. AlphaPy provides some of these functions -in the ``alphapy.features`` module, but it can also import external -functions as well. - -Our target variable is ``rrover``, the ratio of the 1-day range to -the 10-day average high/low range. If that ratio is greater than -or equal to 1.0, then the value of ``rrover`` is True. This is -what we are trying to predict. - -.. literalinclude:: rrover_model.yml - :language: yaml - :caption: **model.yml** - -**Step 2**: Now, let's run MarketFlow:: - - mflow --pdate 2017-10-01 - -As ``mflow`` runs, you will see the progress of the workflow, -and the logging output is saved in ``market_flow.log``. When the -workflow completes, your project structure will look like this, -with a different datestamp:: - - Trading Model - ├── market_flow.log - ├── config - ├── algos.yml - ├── market.yml - ├── model.yml - └── data - └── input - ├── test_20170420.csv - ├── test.csv - ├── train_20170420.csv - ├── train.csv - └── model - ├── feature_map_20170420.pkl - ├── model_20170420.pkl - └── output - ├── predictions_20170420.csv - ├── probabilities_20170420.csv - ├── rankings_20170420.csv - └── plots - ├── calibration_test.png - ├── calibration_train.png - ├── confusion_test_RF.png - ├── confusion_train_RF.png - ├── feature_importance_train_RF.png - ├── learning_curve_train_RF.png - ├── roc_curve_test.png - ├── roc_curve_train.png - -Let's look at the results in the ``plots`` directory. Since our -scoring function was ``roc_auc``, we examine the ROC Curve first. -The AUC is approximately 0.61, which is not very high but in the -context of the stock market, we may still be able to derive -some predictive power. Further, we are running the model on a -relatively small sample of stocks, as denoted by the jittery -line of the ROC Curve. - -.. image:: rrover_roc_curve.png - :alt: ROC Curve - :width: 100% - :align: center - -We can benefit from more samples, as the learning curve shows -that the training and cross-validation lines have yet to converge. - -.. image:: rrover_learning_curve.png - :alt: ROC Curve - :width: 100% - :align: center - -The good news is that even with a relatively small number of -testing points, the Reliability Curve slopes upward from left -to right, with the dotted line denoting a perfect classifier. - -.. image:: rrover_calibration.png - :alt: ROC Curve - :width: 100% - :align: center - -To get better accuracy, we can raise our threshold to find the -best candidates, since they are ranked by probability, but this -also means limiting our pool of stocks. Let's take a closer -look at the rankings file. - -**Step 3**: From the command line, enter:: - - jupyter notebook - -**Step 4**: Click on the notebook named:: - - A Trading Model.ipynb - -**Step 5**: Run the commands in the notebook, making sure that -when you read in the rankings file, change the date to match -the result from the ``ls`` command. - -``Conclusion`` We can predict large-range days with some confidence, -but only at a higher probability threshold. This is important for -choosing the correct system on any given day. We can achieve -better results with more data, so we recommend expanding the -stock universe, e.g., a group with at least 100 members going -five years back. diff --git a/docs/tutorials/pyfolio.png b/docs/tutorials/pyfolio.png deleted file mode 100644 index b3e6295..0000000 Binary files a/docs/tutorials/pyfolio.png and /dev/null differ diff --git a/docs/tutorials/returns.png b/docs/tutorials/returns.png deleted file mode 100644 index eda028b..0000000 Binary files a/docs/tutorials/returns.png and /dev/null differ diff --git a/docs/tutorials/rrover_calibration.png b/docs/tutorials/rrover_calibration.png deleted file mode 100644 index 9a865d7..0000000 Binary files a/docs/tutorials/rrover_calibration.png and /dev/null differ diff --git a/docs/tutorials/rrover_learning_curve.png b/docs/tutorials/rrover_learning_curve.png deleted file mode 100644 index 4a0d79b..0000000 Binary files a/docs/tutorials/rrover_learning_curve.png and /dev/null differ diff --git a/docs/tutorials/rrover_market.yml b/docs/tutorials/rrover_market.yml deleted file mode 100644 index 4d9ed20..0000000 --- a/docs/tutorials/rrover_market.yml +++ /dev/null @@ -1,138 +0,0 @@ -market: - create_model : True - data_fractal : 1d - data_history : 500 - forecast_period : 1 - fractal : 1d - lag_period : 1 - leaders : ['gap', 'gapbadown', 'gapbaup', 'gapdown', 'gapup'] - predict_history : 100 - schema : yahoo - subject : stock - target_group : test - -groups: - all : ['aaoi', 'aapl', 'acia', 'adbe', 'adi', 'adp', 'agn', 'aig', 'akam', - 'algn', 'alk', 'alxn', 'amat', 'amba', 'amd', 'amgn', 'amt', 'amzn', - 'antm', 'arch', 'asml', 'athn', 'atvi', 'auph', 'avgo', 'axp', 'ayx', - 'azo', 'ba', 'baba', 'bac', 'bby', 'bidu', 'biib', 'brcd', 'bvsn', - 'bwld', 'c', 'cacc', 'cara', 'casy', 'cat', 'cde', 'celg', 'cern', - 'chkp', 'chtr', 'clvs', 'cme', 'cmg', 'cof', 'cohr', 'comm', 'cost', - 'cpk', 'crm', 'crus', 'csco', 'ctsh', 'ctxs', 'csx', 'cvs', 'cybr', - 'data', 'ddd', 'deck', 'dgaz', 'dia', 'dis', 'dish', 'dnkn', 'dpz', - 'drys', 'dust', 'ea', 'ebay', 'edc', 'edz', 'eem', 'elli', 'eog', - 'esrx', 'etrm', 'ewh', 'ewt', 'expe', 'fang', 'fas', 'faz', 'fb', - 'fcx', 'fdx', 'ffiv', 'fit', 'five', 'fnsr', 'fslr', 'ftnt', 'gddy', - 'gdx', 'gdxj', 'ge', 'gild', 'gld', 'glw', 'gm', 'googl', 'gpro', - 'grub', 'gs', 'gwph', 'hal', 'has', 'hd', 'hdp', 'hlf', 'hog', 'hum', - 'ibb', 'ibm', 'ice', 'idxx', 'ilmn', 'ilmn', 'incy', 'intc', 'intu', - 'ip', 'isrg', 'iwm', 'ivv', 'iwf', 'iwm', 'jack', 'jcp', 'jdst', 'jnj', - 'jnpr', 'jnug', 'jpm', 'kite', 'klac', 'ko', 'kss', 'labd', 'labu', - 'len', 'lite', 'lmt', 'lnkd', 'lrcx', 'lulu', 'lvs', 'mbly', 'mcd', - 'mchp', 'mdy', 'meoh', 'mnst', 'mo', 'momo', 'mon', 'mrk', 'ms', 'msft', - 'mtb', 'mu', 'nflx', 'nfx', 'nke', 'ntap', 'ntes', 'ntnx', 'nugt', - 'nvda', 'nxpi', 'nxst', 'oii', 'oled', 'orcl', 'orly', 'p', 'panw', - 'pcln', 'pg', 'pm', 'pnra', 'prgo', 'pxd', 'pypl', 'qcom', 'qqq', - 'qrvo', 'rht', 'sam', 'sbux', 'sds', 'sgen', 'shld', 'shop', 'sig', - 'sina', 'siri', 'skx', 'slb', 'slv', 'smh', 'snap', 'sncr', 'soda', - 'splk', 'spy', 'stld', 'stmp', 'stx', 'svxy', 'swks', 'symc', 't', - 'tbt', 'teva', 'tgt', 'tho', 'tlt', 'tmo', 'tna', 'tqqq', 'trip', - 'tsla', 'ttwo', 'tvix', 'twlo', 'twtr', 'tza', 'uaa', 'ugaz', 'uhs', - 'ulta', 'ulti', 'unh', 'unp', 'upro', 'uri', 'ups', 'uri', 'uthr', - 'utx', 'uvxy', 'v', 'veev', 'viav', 'vlo', 'vmc', 'vrsn', 'vrtx', 'vrx', - 'vwo', 'vxx', 'vz', 'wday', 'wdc', 'wfc', 'wfm', 'wmt', 'wynn', 'x', - 'xbi', 'xhb', 'xiv', 'xle', 'xlf', 'xlk', 'xlnx', 'xom', 'xlp', 'xlu', - 'xlv', 'xme', 'xom', 'wix', 'yelp', 'z'] - etf : ['dia', 'dust', 'edc', 'edz', 'eem', 'ewh', 'ewt', 'fas', 'faz', - 'gld', 'hyg', 'iwm', 'ivv', 'iwf', 'jnk', 'mdy', 'nugt', 'qqq', - 'sds', 'smh', 'spy', 'tbt', 'tlt', 'tna', 'tvix', 'tza', 'upro', - 'uvxy', 'vwo', 'vxx', 'xhb', 'xiv', 'xle', 'xlf', 'xlk', 'xlp', - 'xlu', 'xlv', 'xme'] - tech : ['aapl', 'adbe', 'amat', 'amgn', 'amzn', 'avgo', 'baba', 'bidu', - 'brcd', 'csco', 'ddd', 'emc', 'expe', 'fb', 'fit', 'fslr', 'goog', - 'intc', 'isrg', 'lnkd', 'msft', 'nflx', 'nvda', 'pcln', 'qcom', - 'qqq', 'tsla', 'twtr'] - test : ['aapl', 'amzn', 'goog', 'fb', 'nvda', 'tsla'] - -features: ['abovema_3', 'abovema_5', 'abovema_10', 'abovema_20', 'abovema_50', - 'adx', 'atr', 'bigdown', 'bigup', 'diminus', 'diplus', 'doji', - 'gap', 'gapbadown', 'gapbaup', 'gapdown', 'gapup', - 'hc', 'hh', 'ho', 'hl', 'lc', 'lh', 'll', 'lo', 'hookdown', 'hookup', - 'inside', 'outside', 'madelta_3', 'madelta_5', 'madelta_7', 'madelta_10', - 'madelta_12', 'madelta_15', 'madelta_18', 'madelta_20', 'madelta', - 'net', 'netdown', 'netup', 'nr_3', 'nr_4', 'nr_5', 'nr_7', 'nr_8', - 'nr_10', 'nr_18', 'roi', 'roi_2', 'roi_3', 'roi_4', 'roi_5', 'roi_10', - 'roi_20', 'rr_1_4', 'rr_1_7', 'rr_1_10', 'rr_2_5', 'rr_2_7', 'rr_2_10', - 'rr_3_8', 'rr_3_14', 'rr_4_10', 'rr_4_20', 'rr_5_10', 'rr_5_20', - 'rr_5_30', 'rr_6_14', 'rr_6_25', 'rr_7_14', 'rr_7_35', 'rr_8_22', - 'rrhigh', 'rrlow', 'rrover', 'rrunder', 'rsi_3', 'rsi_4', 'rsi_5', - 'rsi_6', 'rsi_8', 'rsi_10', 'rsi_14', 'sep_3_3', 'sep_5_5', 'sep_8_8', - 'sep_10_10', 'sep_14_14', 'sep_21_21', 'sep_30_30', 'sep_40_40', - 'sephigh', 'seplow', 'trend', 'vma', 'vmover', 'vmratio', 'vmunder', - 'volatility_3', 'volatility_5', 'volatility', 'volatility_20', - 'wr_2', 'wr_3', 'wr', 'wr_5', 'wr_6', 'wr_7', 'wr_10'] - -aliases: - atr : 'ma_truerange' - aver : 'ma_hlrange' - cma : 'ma_close' - cmax : 'highest_close' - cmin : 'lowest_close' - hc : 'higher_close' - hh : 'higher_high' - hl : 'higher_low' - ho : 'higher_open' - hmax : 'highest_high' - hmin : 'lowest_high' - lc : 'lower_close' - lh : 'lower_high' - ll : 'lower_low' - lo : 'lower_open' - lmax : 'highest_low' - lmin : 'lowest_low' - net : 'net_close' - netdown : 'down_net' - netup : 'up_net' - omax : 'highest_open' - omin : 'lowest_open' - rmax : 'highest_hlrange' - rmin : 'lowest_hlrange' - rr : 'maratio_hlrange' - rixc : 'rindex_close_high_low' - rixo : 'rindex_open_high_low' - roi : 'netreturn_close' - rsi : 'rsi_close' - sepma : 'ma_sep' - vma : 'ma_volume' - vmratio : 'maratio_volume' - upmove : 'net_high' - -variables: - abovema : 'close > cma_50' - belowma : 'close < cma_50' - bigup : 'rrover & sephigh & netup' - bigdown : 'rrover & sephigh & netdown' - doji : 'sepdoji & rrunder' - hookdown : 'open > high[1] & close < close[1]' - hookup : 'open < low[1] & close > close[1]' - inside : 'low > low[1] & high < high[1]' - madelta : '(close - cma_50) / atr_10' - nr : 'hlrange == rmin_4' - outside : 'low < low[1] & high > high[1]' - roihigh : 'roi_5 >= 5' - roilow : 'roi_5 < -5' - roiminus : 'roi_5 < 0' - roiplus : 'roi_5 > 0' - rrhigh : 'rr_1_10 >= 1.2' - rrlow : 'rr_1_10 <= 0.8' - rrover : 'rr_1_10 >= 1.0' - rrunder : 'rr_1_10 < 1.0' - sep : 'rixc_1 - rixo_1' - sepdoji : 'abs(sep) <= 15' - sephigh : 'abs(sep_1_1) >= 70' - seplow : 'abs(sep_1_1) <= 30' - trend : 'rrover & sephigh' - vmover : 'vmratio >= 1' - vmunder : 'vmratio < 1' - volatility : 'atr_10 / close' - wr : 'hlrange == rmax_4' diff --git a/docs/tutorials/rrover_model.yml b/docs/tutorials/rrover_model.yml deleted file mode 100644 index 82d4574..0000000 --- a/docs/tutorials/rrover_model.yml +++ /dev/null @@ -1,124 +0,0 @@ -project: - directory : . - file_extension : csv - submission_file : - submit_probas : False - -data: - drop : ['date', 'tag', 'open', 'high', 'low', 'close', 'volume', 'adjclose', - 'low[1]', 'high[1]', 'net', 'close[1]', 'rmin_3', 'rmin_4', 'rmin_5', - 'rmin_7', 'rmin_8', 'rmin_10', 'rmin_18', 'pval', 'mval', 'vma', - 'rmax_2', 'rmax_3', 'rmax_4', 'rmax_5', 'rmax_6', 'rmax_7', 'rmax_10'] - features : '*' - sampling : - option : True - method : under_random - ratio : 0.5 - sentinel : -1 - separator : ',' - shuffle : True - split : 0.4 - target : rrover - target_value : True - -model: - algorithms : ['RF'] - balance_classes : True - calibration : - option : False - type : isotonic - cv_folds : 3 - estimators : 501 - feature_selection : - option : True - percentage : 50 - uni_grid : [5, 10, 15, 20, 25] - score_func : f_classif - grid_search : - option : False - iterations : 100 - random : True - subsample : True - sampling_pct : 0.25 - pvalue_level : 0.01 - rfe : - option : True - step : 10 - scoring_function : 'roc_auc' - type : classification - -features: - clustering : - option : False - increment : 3 - maximum : 30 - minimum : 3 - counts : - option : False - encoding : - rounding : 3 - type : factorize - factors : [] - interactions : - option : True - poly_degree : 2 - sampling_pct : 5 - isomap : - option : False - components : 2 - neighbors : 5 - logtransform : - option : False - numpy : - option : False - pca : - option : False - increment : 3 - maximum : 15 - minimum : 3 - whiten : False - scaling : - option : True - type : standard - scipy : - option : False - text : - ngrams : 1 - vectorize : False - tsne : - option : False - components : 2 - learning_rate : 1000.0 - perplexity : 30.0 - variance : - option : True - threshold : 0.1 - -treatments: - doji : ['alphapy.features', 'runs_test', ['all'], 18] - hc : ['alphapy.features', 'runs_test', ['all'], 18] - hh : ['alphapy.features', 'runs_test', ['all'], 18] - hl : ['alphapy.features', 'runs_test', ['all'], 18] - ho : ['alphapy.features', 'runs_test', ['all'], 18] - rrhigh : ['alphapy.features', 'runs_test', ['all'], 18] - rrlow : ['alphapy.features', 'runs_test', ['all'], 18] - rrover : ['alphapy.features', 'runs_test', ['all'], 18] - rrunder : ['alphapy.features', 'runs_test', ['all'], 18] - sephigh : ['alphapy.features', 'runs_test', ['all'], 18] - seplow : ['alphapy.features', 'runs_test', ['all'], 18] - trend : ['alphapy.features', 'runs_test', ['all'], 18] - -pipeline: - number_jobs : -1 - seed : 10231 - verbosity : 0 - -plots: - calibration : True - confusion_matrix : True - importances : True - learning_curve : True - roc_curve : True - -xgboost: - stopping_rounds : 20 diff --git a/docs/tutorials/rrover_roc_curve.png b/docs/tutorials/rrover_roc_curve.png deleted file mode 100644 index 34e131b..0000000 Binary files a/docs/tutorials/rrover_roc_curve.png and /dev/null differ diff --git a/docs/tutorials/system.rst b/docs/tutorials/system.rst deleted file mode 100644 index b996431..0000000 --- a/docs/tutorials/system.rst +++ /dev/null @@ -1,98 +0,0 @@ -Trading System Tutorial -======================= - -*MarketFlow Running Time: Approximately 1 minute* - -.. image:: returns.png - :alt: Cumulative Returns - :width: 75% - :align: center - -A trading system is a set of automated rules for buying and selling -stocks, options, futures, and other instruments. Trading is considered -to be both an art and a science; the scientific branch is known as -*technical analysis*. Many technicians spend their lives chasing the -Holy Grail: a system that will make them rich simply by detecting -common patterns. Technicians in history such as Edwards, Elliott, -Fibonacci, Gann, and Gartley show us visually appealing charts, -but there is no scientific evidence proving that these techniques -actually work. - -Trading systems generally operate in two contexts: trend and -counter-trend. A system that follows the trend tries to stay in -one direction as long as possible. A system that bucks the trend -reverses direction at certain support and resistance levels, also -known as fading the trend. With MarketFlow, you can implement -either type of system using our long/short strategy. - -In this tutorial, we are going to test a simple long/short system. -If today's closing price is greater than yesterday's close, then -we go long. If today's close is lower than yesterday's, then we -go short, so we always have a position in the market. - -**Step 1**: From the ``examples`` directory, change your directory:: - - cd "Trading System" - -Before running MarketFlow, let's review the ``market.yml`` file -in the ``config`` directory. Since we are just running a system, -we really don't need the ``model.yml`` file, but if you have a -project where the system is derived from a model, then you will -want to maintain both files. - -In ``market.yml``, we will test our system on five stocks in the -target group ``faang``, going back 1000 trading days. We need -to define only two features: ``hc`` for higher close, and ``lc`` -for lower close. We name the system ``closer``, which requires -just a ``longentry`` and a ``shortentry``. There are no exit -conditions and no holding period, so we will always have a position -in each stock. - -.. literalinclude:: closer_market.yml - :language: yaml - :caption: **market.yml** - -**Step 2**: Now, let's run MarketFlow:: - - mflow - -As ``mflow`` runs, you will see the progress of the workflow, -and the logging output is saved in ``market_flow.log``. When the -workflow completes, your project structure will look like this, -with an additional directory ``systems``:: - - Trading System - ├── market_flow.log - ├── config - ├── algos.yml - ├── market.yml - ├── model.yml - └── data - └── input - └── model - └── output - └── plots - └── systems - ├── faang_closer_positions_1d.csv - ├── faang_closer_returns_1d.csv - ├── faang_closer_trades_1d.csv - ├── faang_closer_transactions_1d.csv - -MarketFlow records position, return, and transaction data in the -``systems`` directory, so now we can analyze our results with -*Pyfolio*. - -**Step 3**: From the command line, enter:: - - jupyter notebook - -**Step 4**: Click on the notebook named:: - - A Trading System.ipynb - -You should obtain the following results in your notebook. - -.. image:: pyfolio.png - :alt: Pyfolio Tear Sheet - :width: 100% - :align: center diff --git a/docs/user_guide/market.yml b/docs/user_guide/market.yml deleted file mode 100644 index 7f73975..0000000 --- a/docs/user_guide/market.yml +++ /dev/null @@ -1,134 +0,0 @@ -market: - data_history : 2000 - forecast_period : 1 - fractal : 1d - leaders : ['gap', 'gapbadown', 'gapbaup', 'gapdown', 'gapup'] - predict_history : 100 - schema : prices - target_group : test - -groups: - all : ['aaoi', 'aapl', 'acia', 'adbe', 'adi', 'adp', 'agn', 'aig', 'akam', - 'algn', 'alk', 'alxn', 'amat', 'amba', 'amd', 'amgn', 'amt', 'amzn', - 'antm', 'arch', 'asml', 'athn', 'atvi', 'auph', 'avgo', 'axp', 'ayx', - 'azo', 'ba', 'baba', 'bac', 'bby', 'bidu', 'biib', 'brcd', 'bvsn', - 'bwld', 'c', 'cacc', 'cara', 'casy', 'cat', 'cde', 'celg', 'cern', - 'chkp', 'chtr', 'clvs', 'cme', 'cmg', 'cof', 'cohr', 'comm', 'cost', - 'cpk', 'crm', 'crus', 'csco', 'ctsh', 'ctxs', 'csx', 'cvs', 'cybr', - 'data', 'ddd', 'deck', 'dgaz', 'dia', 'dis', 'dish', 'dnkn', 'dpz', - 'drys', 'dust', 'ea', 'ebay', 'edc', 'edz', 'eem', 'elli', 'eog', - 'esrx', 'etrm', 'ewh', 'ewt', 'expe', 'fang', 'fas', 'faz', 'fb', - 'fcx', 'fdx', 'ffiv', 'fit', 'five', 'fnsr', 'fslr', 'ftnt', 'gddy', - 'gdx', 'gdxj', 'ge', 'gild', 'gld', 'glw', 'gm', 'googl', 'gpro', - 'grub', 'gs', 'gwph', 'hal', 'has', 'hd', 'hdp', 'hlf', 'hog', 'hum', - 'ibb', 'ibm', 'ice', 'idxx', 'ilmn', 'ilmn', 'incy', 'intc', 'intu', - 'ip', 'isrg', 'iwm', 'ivv', 'iwf', 'iwm', 'jack', 'jcp', 'jdst', 'jnj', - 'jnpr', 'jnug', 'jpm', 'kite', 'klac', 'ko', 'kss', 'labd', 'labu', - 'len', 'lite', 'lmt', 'lnkd', 'lrcx', 'lulu', 'lvs', 'mbly', 'mcd', - 'mchp', 'mdy', 'meoh', 'mnst', 'mo', 'momo', 'mon', 'mrk', 'ms', 'msft', - 'mtb', 'mu', 'nflx', 'nfx', 'nke', 'ntap', 'ntes', 'ntnx', 'nugt', - 'nvda', 'nxpi', 'nxst', 'oii', 'oled', 'orcl', 'orly', 'p', 'panw', - 'pcln', 'pg', 'pm', 'pnra', 'prgo', 'pxd', 'pypl', 'qcom', 'qqq', - 'qrvo', 'rht', 'sam', 'sbux', 'sds', 'sgen', 'shld', 'shop', 'sig', - 'sina', 'siri', 'skx', 'slb', 'slv', 'smh', 'snap', 'sncr', 'soda', - 'splk', 'spy', 'stld', 'stmp', 'stx', 'svxy', 'swks', 'symc', 't', - 'tbt', 'teva', 'tgt', 'tho', 'tlt', 'tmo', 'tna', 'tqqq', 'trip', - 'tsla', 'ttwo', 'tvix', 'twlo', 'twtr', 'tza', 'uaa', 'ugaz', 'uhs', - 'ulta', 'ulti', 'unh', 'unp', 'upro', 'uri', 'ups', 'uri', 'uthr', - 'utx', 'uvxy', 'v', 'veev', 'viav', 'vlo', 'vmc', 'vrsn', 'vrtx', 'vrx', - 'vwo', 'vxx', 'vz', 'wday', 'wdc', 'wfc', 'wfm', 'wmt', 'wynn', 'x', - 'xbi', 'xhb', 'xiv', 'xle', 'xlf', 'xlk', 'xlnx', 'xom', 'xlp', 'xlu', - 'xlv', 'xme', 'xom', 'wix', 'yelp', 'z'] - etf : ['dia', 'dust', 'edc', 'edz', 'eem', 'ewh', 'ewt', 'fas', 'faz', - 'gld', 'hyg', 'iwm', 'ivv', 'iwf', 'jnk', 'mdy', 'nugt', 'qqq', - 'sds', 'smh', 'spy', 'tbt', 'tlt', 'tna', 'tvix', 'tza', 'upro', - 'uvxy', 'vwo', 'vxx', 'xhb', 'xiv', 'xle', 'xlf', 'xlk', 'xlp', - 'xlu', 'xlv', 'xme'] - tech : ['aapl', 'adbe', 'amat', 'amgn', 'amzn', 'avgo', 'baba', 'bidu', - 'brcd', 'csco', 'ddd', 'emc', 'expe', 'fb', 'fit', 'fslr', 'goog', - 'intc', 'isrg', 'lnkd', 'msft', 'nflx', 'nvda', 'pcln', 'qcom', - 'qqq', 'tsla', 'twtr'] - test : ['aapl', 'amzn', 'goog', 'fb', 'nvda', 'tsla'] - -features: ['abovema_3', 'abovema_5', 'abovema_10', 'abovema_20', 'abovema_50', - 'adx', 'atr', 'bigdown', 'bigup', 'diminus', 'diplus', 'doji', - 'gap', 'gapbadown', 'gapbaup', 'gapdown', 'gapup', - 'hc', 'hh', 'ho', 'hl', 'lc', 'lh', 'll', 'lo', 'hookdown', 'hookup', - 'inside', 'outside', 'madelta_3', 'madelta_5', 'madelta_7', 'madelta_10', - 'madelta_12', 'madelta_15', 'madelta_18', 'madelta_20', 'madelta', - 'net', 'netdown', 'netup', 'nr_3', 'nr_4', 'nr_5', 'nr_7', 'nr_8', - 'nr_10', 'nr_18', 'roi', 'roi_2', 'roi_3', 'roi_4', 'roi_5', 'roi_10', - 'roi_20', 'rr_1_4', 'rr_1_7', 'rr_1_10', 'rr_2_5', 'rr_2_7', 'rr_2_10', - 'rr_3_8', 'rr_3_14', 'rr_4_10', 'rr_4_20', 'rr_5_10', 'rr_5_20', - 'rr_5_30', 'rr_6_14', 'rr_6_25', 'rr_7_14', 'rr_7_35', 'rr_8_22', - 'rrhigh', 'rrlow', 'rrover', 'rrunder', 'rsi_3', 'rsi_4', 'rsi_5', - 'rsi_6', 'rsi_8', 'rsi_10', 'rsi_14', 'sep_3_3', 'sep_5_5', 'sep_8_8', - 'sep_10_10', 'sep_14_14', 'sep_21_21', 'sep_30_30', 'sep_40_40', - 'sephigh', 'seplow', 'trend', 'vma', 'vmover', 'vmratio', 'vmunder', - 'volatility_3', 'volatility_5', 'volatility', 'volatility_20', - 'wr_2', 'wr_3', 'wr', 'wr_5', 'wr_6', 'wr_7', 'wr_10'] - -aliases: - atr : 'ma_truerange' - aver : 'ma_hlrange' - cma : 'ma_close' - cmax : 'highest_close' - cmin : 'lowest_close' - hc : 'higher_close' - hh : 'higher_high' - hl : 'higher_low' - ho : 'higher_open' - hmax : 'highest_high' - hmin : 'lowest_high' - lc : 'lower_close' - lh : 'lower_high' - ll : 'lower_low' - lo : 'lower_open' - lmax : 'highest_low' - lmin : 'lowest_low' - net : 'net_close' - netdown : 'down_net' - netup : 'up_net' - omax : 'highest_open' - omin : 'lowest_open' - rmax : 'highest_hlrange' - rmin : 'lowest_hlrange' - rr : 'maratio_hlrange' - rixc : 'rindex_close_high_low' - rixo : 'rindex_open_high_low' - roi : 'netreturn_close' - rsi : 'rsi_close' - sepma : 'ma_sep' - vma : 'ma_volume' - vmratio : 'maratio_volume' - upmove : 'net_high' - -variables: - abovema : 'close > cma_50' - belowma : 'close < cma_50' - bigup : 'rrover & sephigh & netup' - bigdown : 'rrover & sephigh & netdown' - doji : 'sepdoji & rrunder' - hookdown : 'open > high[1] & close < close[1]' - hookup : 'open < low[1] & close > close[1]' - inside : 'low > low[1] & high < high[1]' - madelta : '(close - cma_50) / atr_10' - nr : 'hlrange == rmin_4' - outside : 'low < low[1] & high > high[1]' - roihigh : 'roi_5 >= 5' - roilow : 'roi_5 < -5' - roiminus : 'roi_5 < 0' - roiplus : 'roi_5 > 0' - rrhigh : 'rr_1_10 >= 1.2' - rrlow : 'rr_1_10 <= 0.8' - rrover : 'rr_1_10 >= 1.0' - rrunder : 'rr_1_10 < 1.0' - sep : 'rixc_1 - rixo_1' - sepdoji : 'abs(sep) <= 15' - sephigh : 'abs(sep_1_1) >= 70' - seplow : 'abs(sep_1_1) <= 30' - trend : 'rrover & sephigh' - vmover : 'vmratio >= 1' - vmunder : 'vmratio < 1' - volatility : 'atr_10 / close' - wr : 'hlrange == rmax_4' diff --git a/docs/user_guide/market_flow.rst b/docs/user_guide/market_flow.rst deleted file mode 100644 index 1e03697..0000000 --- a/docs/user_guide/market_flow.rst +++ /dev/null @@ -1,369 +0,0 @@ -MarketFlow -========== - -**MarketFlow** (``mflow``) is AlphaPy Pro's specialized pipeline for financial market -analysis and algorithmic trading. It transforms raw market data into machine learning -models for market predictions, portfolio optimization, and systematic trading strategies. - -.. image:: market_pipeline.png - :alt: MarketFlow Pipeline - :width: 100% - :align: center - -Overview --------- - -MarketFlow provides a complete workflow for quantitative finance: - -1. **Data Acquisition** - Fetch market data from multiple sources -2. **Feature Engineering** - Create technical indicators and market features -3. **Signal Generation** - Build predictive models for market movements -4. **Portfolio Construction** - Optimize position sizing and risk management -5. **Backtesting** - Evaluate strategy performance with realistic assumptions - -Modern Data Sources -------------------- - -AlphaPy Pro MarketFlow supports multiple professional-grade data providers: - -**Primary Data Sources:** - -* **EODHD (End of Day Historical Data)** - Daily and intraday market data -* **Yahoo Finance** - Free daily stock data via yfinance -* **Polygon** - Professional real-time and historical market data - -**Legacy Support:** - -* **Google Finance** - Deprecated (API discontinued in 2017) -* **Quandl** - Limited free tier available - -.. note:: Google Finance intraday data is no longer available. Modern applications - should use EODHD or Polygon for intraday data requirements. - -Data Format and Structure -------------------------- - -MarketFlow standardizes all market data into a consistent format: - -**Daily Market Data (OHLCV):** - -.. code-block:: csv - - Date,Open,High,Low,Close,Volume,Symbol - 2024-01-02,185.64,186.89,183.86,185.64,52844800,AAPL - 2024-01-03,184.97,185.89,182.73,184.25,58414800,AAPL - -**Intraday Market Data:** - -.. code-block:: csv - - Datetime,Open,High,Low,Close,Volume,Symbol,bar_number - 2024-01-02 09:30:00,185.64,185.89,185.30,185.50,125400,AAPL,1 - 2024-01-02 09:31:00,185.50,185.75,185.25,185.60,98300,AAPL,2 - -Configuration -------------- - -MarketFlow uses a hierarchical configuration system combining multiple YAML files: - -**market.yml** - Market-specific configuration: - -.. code-block:: yaml - - market: - data_history : 252 # Trading days of history - forecast_period : 1 # Days to forecast - fractal : '1d' # Time frame (1d, 1h, 5m) - leaders : ['open'] # Features available at market open - predict_history : 100 # Min periods for prediction - schema : 'prices' # Data schema identifier - target_group : 'tech' # Symbol group to analyze - - groups: - tech: - - AAPL - - MSFT - - GOOGL - - META - - TSLA - - crypto: - - BTC-USD - - ETH-USD - - ADA-USD - - etf: - - SPY - - QQQ - - IWM - -**Data Source Configuration:** - -.. code-block:: yaml - - data_sources: - primary: 'eodhd' # Primary data source - fallback: 'yahoo' # Fallback source - - eodhd: - api_key: 'your_api_key' - base_url: 'https://eodhistoricaldata.com/api/' - - polygon: - api_key: 'your_polygon_key' - base_url: 'https://api.polygon.io/' - -Variable Definition Language (VDL) ----------------------------------- - -MarketFlow includes a powerful Variable Definition Language for creating -technical indicators and custom features: - -**Basic Technical Indicators:** - -.. code-block:: yaml - - variables: - # Moving averages - sma_20: 'mean(close, 20)' # Simple moving average - ema_12: 'ewm(close, 12)' # Exponential moving average - - # Momentum indicators - rsi_14: 'rsi(close, 14)' # Relative Strength Index - macd: 'macd(close, 12, 26, 9)' # MACD - - # Volatility indicators - bb_upper: 'bollinger_upper(close, 20, 2)' # Bollinger Bands - atr_14: 'atr(high, low, close, 14)' # Average True Range - - # Volume indicators - obv: 'on_balance_volume(close, volume)' # On Balance Volume - vwap: 'volume_weighted_average_price(high, low, close, volume)' - -**Custom Expressions:** - -.. code-block:: yaml - - variables: - # Price relationships - above_sma: 'close > sma_20' # Boolean: price above SMA - price_momentum: 'close / sma_50' # Price relative to trend - - # Volatility measures - daily_return: 'pct_change(close, 1)' - volatility: 'std(daily_return, 20)' - - # Multi-timeframe - weekly_high: 'resample(high, "W", "max")' - -**Aliases for Convenience:** - -.. code-block:: yaml - - aliases: - cma: 'sma_close' # Closing moving average - vol: 'volume' # Volume shorthand - ret: 'pct_change(close, 1)' # Daily returns - -Advanced Features ------------------ - -**Meta-Labeling:** - -MarketFlow implements the Triple Barrier Method for advanced financial ML: - -.. code-block:: yaml - - model: - meta_labeling: - option: True - profit_target: 0.02 # 2% profit target - stop_loss: 0.01 # 1% stop loss - max_holding: 5 # Max holding period (days) - volatility_window: 20 # Volatility calculation window - -**Time Series Cross-Validation:** - -Proper time series validation that respects temporal order: - -.. code-block:: yaml - - model: - time_series: - option: True - cv_method: 'time_series_split' # Time-aware CV - test_size: 0.2 # Recent 20% for testing - gap: 1 # Gap between train/test - -**Portfolio Optimization:** - -.. code-block:: yaml - - portfolio: - optimization: 'mean_variance' # Optimization method - max_weight: 0.1 # Max position size - rebalance_freq: 'monthly' # Rebalancing frequency - transaction_cost: 0.001 # 10 bps transaction cost - -Running MarketFlow ------------------- - -**Basic Usage:** - -.. code-block:: bash - - # Train models for default group - mflow - - # Train with specific date range - mflow --tdate 2020-01-01 --pdate 2023-12-31 - - # Generate predictions only - mflow --predict - -**Configuration Options:** - -.. code-block:: bash - - # Use different data source - mflow --source eodhd - - # Extended history - mflow --history 500 - - # Different time frame - mflow --fractal 1h - -Output Structure ----------------- - -MarketFlow generates comprehensive output for analysis: - -.. code-block:: text - - runs/run_YYYYMMDD_HHMMSS/ - ├── config/ - │ ├── market.yml - │ └── model.yml - ├── data/ - │ ├── features/ # Engineered features - │ ├── prices/ # Raw price data - │ └── indicators/ # Technical indicators - ├── models/ - │ ├── signal_model.pkl # Trained prediction model - │ └── portfolio_model.pkl # Portfolio optimization - ├── predictions/ - │ ├── signals.csv # Model predictions - │ └── positions.csv # Portfolio positions - └── analysis/ - ├── backtest_results.html # Performance report - ├── factor_analysis.csv # Factor attribution - └── risk_metrics.csv # Risk analytics - -Trading Systems Integration ---------------------------- - -MarketFlow can generate trading signals for various execution platforms: - -**Signal Generation:** - -.. code-block:: yaml - - systems: - long_short: - signal_long: 'prediction > 0.6' # Long threshold - signal_short: 'prediction < 0.4' # Short threshold - max_positions: 20 # Position limit - - momentum: - signal_long: 'close > sma_20 and rsi_14 < 70' - signal_exit: 'close < sma_20 or rsi_14 > 80' - -**Risk Management:** - -.. code-block:: yaml - - risk: - max_portfolio_vol: 0.15 # 15% max portfolio volatility - max_individual_weight: 0.05 # 5% max individual position - stop_loss: 0.02 # 2% stop loss - profit_target: 0.04 # 4% profit target - -Example Applications --------------------- - -**1. Momentum Strategy:** - -.. code-block:: yaml - - target: 'future_return_5d > 0.02' # 2% return in 5 days - features: - - 'rsi_14' - - 'macd_signal' - - 'volume_ratio_20' - - 'price_momentum_50' - -**2. Mean Reversion:** - -.. code-block:: yaml - - target: 'future_return_1d' - features: - - 'zscore_close_20' # Z-score of price - - 'rsi_oversold' # RSI < 30 - - 'bollinger_position' # Position in Bollinger Bands - -**3. Multi-Asset Strategy:** - -.. code-block:: yaml - - groups: - universe: - - SPY # S&P 500 - - TLT # 20+ Year Treasury - - GLD # Gold - - VIX # Volatility - - features: - - 'correlation_spy_20' - - 'relative_strength' - - 'regime_indicator' - -Performance Analytics ---------------------- - -MarketFlow provides comprehensive performance analysis: - -**Returns Analysis:** -* Total return and CAGR -* Sharpe ratio and Sortino ratio -* Maximum drawdown -* Win rate and profit factor - -**Risk Metrics:** -* Value at Risk (VaR) -* Conditional VaR (CVaR) -* Beta and correlation analysis -* Factor exposure analysis - -**Trading Metrics:** -* Transaction costs -* Turnover and capacity -* Implementation shortfall -* Market impact analysis - -Best Practices --------------- - -1. **Data Quality** - Validate data sources and handle corporate actions -2. **Feature Engineering** - Focus on regime-aware features -3. **Walk-Forward Analysis** - Use time-series cross-validation -4. **Risk Management** - Implement proper position sizing -5. **Transaction Costs** - Account for realistic trading costs -6. **Out-of-Sample Testing** - Reserve recent data for final validation - -For detailed examples, see the ``projects/`` directory which includes: - -* Shannon's Demon trading strategy -* Time series momentum models -* Triple barrier method implementations \ No newline at end of file diff --git a/docs/user_guide/market_model.yml b/docs/user_guide/market_model.yml deleted file mode 100644 index 82d4574..0000000 --- a/docs/user_guide/market_model.yml +++ /dev/null @@ -1,124 +0,0 @@ -project: - directory : . - file_extension : csv - submission_file : - submit_probas : False - -data: - drop : ['date', 'tag', 'open', 'high', 'low', 'close', 'volume', 'adjclose', - 'low[1]', 'high[1]', 'net', 'close[1]', 'rmin_3', 'rmin_4', 'rmin_5', - 'rmin_7', 'rmin_8', 'rmin_10', 'rmin_18', 'pval', 'mval', 'vma', - 'rmax_2', 'rmax_3', 'rmax_4', 'rmax_5', 'rmax_6', 'rmax_7', 'rmax_10'] - features : '*' - sampling : - option : True - method : under_random - ratio : 0.5 - sentinel : -1 - separator : ',' - shuffle : True - split : 0.4 - target : rrover - target_value : True - -model: - algorithms : ['RF'] - balance_classes : True - calibration : - option : False - type : isotonic - cv_folds : 3 - estimators : 501 - feature_selection : - option : True - percentage : 50 - uni_grid : [5, 10, 15, 20, 25] - score_func : f_classif - grid_search : - option : False - iterations : 100 - random : True - subsample : True - sampling_pct : 0.25 - pvalue_level : 0.01 - rfe : - option : True - step : 10 - scoring_function : 'roc_auc' - type : classification - -features: - clustering : - option : False - increment : 3 - maximum : 30 - minimum : 3 - counts : - option : False - encoding : - rounding : 3 - type : factorize - factors : [] - interactions : - option : True - poly_degree : 2 - sampling_pct : 5 - isomap : - option : False - components : 2 - neighbors : 5 - logtransform : - option : False - numpy : - option : False - pca : - option : False - increment : 3 - maximum : 15 - minimum : 3 - whiten : False - scaling : - option : True - type : standard - scipy : - option : False - text : - ngrams : 1 - vectorize : False - tsne : - option : False - components : 2 - learning_rate : 1000.0 - perplexity : 30.0 - variance : - option : True - threshold : 0.1 - -treatments: - doji : ['alphapy.features', 'runs_test', ['all'], 18] - hc : ['alphapy.features', 'runs_test', ['all'], 18] - hh : ['alphapy.features', 'runs_test', ['all'], 18] - hl : ['alphapy.features', 'runs_test', ['all'], 18] - ho : ['alphapy.features', 'runs_test', ['all'], 18] - rrhigh : ['alphapy.features', 'runs_test', ['all'], 18] - rrlow : ['alphapy.features', 'runs_test', ['all'], 18] - rrover : ['alphapy.features', 'runs_test', ['all'], 18] - rrunder : ['alphapy.features', 'runs_test', ['all'], 18] - sephigh : ['alphapy.features', 'runs_test', ['all'], 18] - seplow : ['alphapy.features', 'runs_test', ['all'], 18] - trend : ['alphapy.features', 'runs_test', ['all'], 18] - -pipeline: - number_jobs : -1 - seed : 10231 - verbosity : 0 - -plots: - calibration : True - confusion_matrix : True - importances : True - learning_curve : True - roc_curve : True - -xgboost: - stopping_rounds : 20 diff --git a/docs/user_guide/market_pipeline.png b/docs/user_guide/market_pipeline.png deleted file mode 100644 index 7bc7180..0000000 Binary files a/docs/user_guide/market_pipeline.png and /dev/null differ diff --git a/docs/user_guide/pipelines.rst b/docs/user_guide/pipelines.rst index 4ffa5bc..28f4f50 100644 --- a/docs/user_guide/pipelines.rst +++ b/docs/user_guide/pipelines.rst @@ -252,8 +252,8 @@ All outputs are saved in timestamped run directories:: Advanced Features ----------------- -**Meta-Labeling:** - Triple Barrier Method for financial ML applications +**Variable/Transform DSL:** + Declarative feature definitions with reusable transform functions **Time Series Support:** Lag features, rolling statistics, and proper CV splits @@ -277,6 +277,4 @@ Best Practices 5. **Ensemble Wisely** - Combine diverse models for best results 6. **Track Experiments** - Use the timestamped runs for comparison -For specific domain applications, see: - -* :doc:`market_flow` - Financial market analysis and trading strategies \ No newline at end of file +For concrete project setup details, see :doc:`project`. diff --git a/docs/user_guide/system_pipeline.png b/docs/user_guide/system_pipeline.png deleted file mode 100644 index 816b1e9..0000000 Binary files a/docs/user_guide/system_pipeline.png and /dev/null differ diff --git a/projects/time-series/README.md b/projects/time-series/README.md index e0c5a8c..585844f 100644 --- a/projects/time-series/README.md +++ b/projects/time-series/README.md @@ -67,13 +67,8 @@ The project uses regression algorithms optimized for time series: alphapy ``` - Or use the market flow pipeline if working with financial time series: - ```bash - mflow - ``` - 4. **Monitor progress**: - - Check `alphapy.log` or `market_flow.log` for execution details + - Check `alphapy.log` for execution details - Results saved in timestamped directories under `runs/` ## Output Structure @@ -145,4 +140,4 @@ This setup works well for: 1. **Memory Issues**: Reduce features or use sampling for large datasets 2. **Poor Performance**: Check for data leakage or insufficient lag features 3. **Overfitting**: Reduce model complexity or add regularization -4. **Computational Time**: Use fewer algorithms or enable parallel processing \ No newline at end of file +4. **Computational Time**: Use fewer algorithms or enable parallel processing diff --git a/projects/time-series/config/model.yml b/projects/time-series/config/model.yml index b29f3d0..2300926 100644 --- a/projects/time-series/config/model.yml +++ b/projects/time-series/config/model.yml @@ -104,7 +104,7 @@ features: threshold : 0.1 transforms: - ds : ['alphapy.transforms', 'dateparts'] + ds : ['alphapy.calendrical', 'dateparts'] pipeline: number_jobs : -1 diff --git a/scripts/markets/analyze_encoding_deep.py b/scripts/markets/analyze_encoding_deep.py deleted file mode 100644 index eb3a830..0000000 --- a/scripts/markets/analyze_encoding_deep.py +++ /dev/null @@ -1,277 +0,0 @@ -""" -Deep analysis of encoded price patterns — looks at component-level patterns, -conditional probabilities, and regime-aware signals. -""" - -import os -import sys -from collections import defaultdict -from datetime import datetime, timedelta - -import numpy as np -import pandas as pd - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app", "backend")) -from dotenv import load_dotenv -load_dotenv(os.path.join(os.path.dirname(__file__), "..", "app", "backend", ".env")) -from data_fetcher import MassiveDataFetcher - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from app.agent.ml.encoding.encoder import encode_price_df -from app.agent.ml.encoding.price_encoder import PriceEncoder - - -SYMBOLS = [ - "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", - "JPM", "V", "UNH", "HD", "NFLX", "ADBE", "CRM", "AMD", - "PYPL", "BA", "GS", "XOM", "CVX", "COST", "COIN", "SQ", -] - -FORWARD_DAYS = [1, 3, 5, 10] - - -def fetch_all(fetcher, symbols, days_back=730): - """Fetch daily bars for all symbols.""" - data = {} - end = datetime.now().strftime("%Y-%m-%d") - start = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d") - - for i, sym in enumerate(symbols): - print(f" [{i+1}/{len(symbols)}] {sym}...", end=" ", flush=True) - df = fetcher.fetch_bars(sym, "1d", start, end) - if df is not None and not df.empty and len(df) >= 60: - for col in ["open", "high", "low", "close"]: - df[col] = pd.to_numeric(df[col], errors="coerce") - df["volume"] = pd.to_numeric(df["volume"], errors="coerce").fillna(0) - df = df.dropna(subset=["close"]).reset_index(drop=True) - data[sym] = df - print(f"{len(df)} bars") - else: - print("skip") - return data - - -def build_encoded_dataset(all_data): - """Encode all symbols and build a unified dataset with forward returns.""" - rows = [] - for sym, df in all_data.items(): - enc = encode_price_df(df, p=20) - close = df["close"].values - - for i in range(20, len(df) - 10): - row = { - "symbol": sym, - "bar_idx": i, - "close": close[i], - # Encoding components - "token": enc["encoded_str"].iloc[i], - "pivot_str": enc["pivot_str"].iloc[i], - "net_str": enc["net_str"].iloc[i], - "range_str": enc["range_str"].iloc[i], - "volume_str": enc["volume_str"].iloc[i], - # Numeric features - "pivot_strength": enc["pivot_strength"].iloc[i], - "pivot_direction": enc["pivot_direction"].iloc[i], - "net_direction": enc["net_direction"].iloc[i], - "net_magnitude": enc["net_magnitude"].iloc[i], - "range_magnitude": enc["range_magnitude"].iloc[i], - "volume_magnitude": enc["volume_magnitude"].iloc[i], - # Previous bar context - "prev_token": enc["encoded_str"].iloc[i-1] if i > 0 else "", - "prev_pivot": enc["pivot_str"].iloc[i-1] if i > 0 else "", - "prev_net": enc["net_str"].iloc[i-1] if i > 0 else "", - # 2-gram - "bigram": f"{enc['encoded_str'].iloc[i-1]} {enc['encoded_str'].iloc[i]}", - # 3-gram - "trigram": f"{enc['encoded_str'].iloc[i-2]} {enc['encoded_str'].iloc[i-1]} {enc['encoded_str'].iloc[i]}" if i >= 2 else "", - } - # Forward returns - for fwd in FORWARD_DAYS: - if i + fwd < len(close): - row[f"fwd_{fwd}d"] = (close[i + fwd] - close[i]) / close[i] - else: - row[f"fwd_{fwd}d"] = np.nan - rows.append(row) - - return pd.DataFrame(rows) - - -def analyze_single_dimension(dataset, dim_col, dim_name, min_count=100): - """Analyze forward returns by a single encoding dimension.""" - print(f"\n{'─'*70}") - print(f" {dim_name.upper()} DIMENSION") - print(f"{'─'*70}") - - groups = dataset.groupby(dim_col) - results = [] - for val, grp in groups: - if len(grp) < min_count: - continue - row = {"value": val, "count": len(grp)} - for fwd in FORWARD_DAYS: - col = f"fwd_{fwd}d" - rets = grp[col].dropna() - row[f"avg_{fwd}d"] = rets.mean() * 100 - row[f"win_{fwd}d"] = (rets > 0).mean() * 100 - row[f"sharpe_{fwd}d"] = (rets.mean() / max(rets.std(), 1e-10)) * np.sqrt(252/fwd) - results.append(row) - - results.sort(key=lambda r: r.get("avg_5d", 0), reverse=True) - - print(f" {'Value':<12} {'Count':>6} {'Avg1d':>7} {'Avg5d':>7} {'Win5d':>6} {'Shp5d':>6} {'Avg10d':>7} {'Win10d':>6}") - print(f" {'─'*12} {'─'*6} {'─'*7} {'─'*7} {'─'*6} {'─'*6} {'─'*7} {'─'*6}") - for r in results: - print(f" {r['value']:<12} {r['count']:>6} {r['avg_1d']:>6.2f}% {r['avg_5d']:>6.2f}% {r['win_5d']:>5.1f}% {r['sharpe_5d']:>6.2f} {r['avg_10d']:>6.2f}% {r['win_10d']:>5.1f}%") - - -def analyze_combinations(dataset, min_count=30): - """Analyze pivot + net combinations — most actionable signals.""" - print(f"\n{'='*70}") - print(f" PIVOT + NET COMBINATIONS (the money patterns)") - print(f"{'='*70}") - - dataset["pivot_net"] = dataset["pivot_str"] + " " + dataset["net_str"] - groups = dataset.groupby("pivot_net") - - results = [] - for val, grp in groups: - if len(grp) < min_count: - continue - rets_5 = grp["fwd_5d"].dropna() - rets_10 = grp["fwd_10d"].dropna() - row = { - "combo": val, - "count": len(grp), - "avg_5d": rets_5.mean() * 100, - "win_5d": (rets_5 > 0).mean() * 100, - "sharpe_5d": (rets_5.mean() / max(rets_5.std(), 1e-10)) * np.sqrt(252/5), - "avg_10d": rets_10.mean() * 100, - "win_10d": (rets_10 > 0).mean() * 100, - } - results.append(row) - - results.sort(key=lambda r: r["avg_5d"], reverse=True) - - print(f"\n {'Pivot+Net':<12} {'Count':>6} {'Avg5d':>7} {'Win5d':>6} {'Shp5d':>6} {'Avg10d':>7} {'Win10d':>6}") - print(f" {'─'*12} {'─'*6} {'─'*7} {'─'*6} {'─'*6} {'─'*7} {'─'*6}") - for r in results: - # Highlight strong signals - marker = "" - if r["win_5d"] > 57 and r["avg_5d"] > 0.3: - marker = " <-- BULLISH" - elif r["win_5d"] < 43 and r["avg_5d"] < -0.3: - marker = " <-- BEARISH" - print(f" {r['combo']:<12} {r['count']:>6} {r['avg_5d']:>6.2f}% {r['win_5d']:>5.1f}% {r['sharpe_5d']:>6.2f} {r['avg_10d']:>6.2f}% {r['win_10d']:>5.1f}%{marker}") - - -def analyze_sequences(dataset, min_count=20): - """Analyze 2-bar sequences: what follows what?""" - print(f"\n{'='*70}") - print(f" SEQUENTIAL PATTERNS: 'After X, if Y appears...'") - print(f"{'='*70}") - - # Focus: after a pivot low + negative bar, what predicts reversal? - bearish_bars = dataset[dataset["net_str"].isin(["N1", "N2"])] - next_bar_positive = bearish_bars[bearish_bars["pivot_strength"] >= 5] - - if len(next_bar_positive) >= 10: - rets = next_bar_positive["fwd_5d"].dropna() - print(f"\n After strong pivot low (L5+) with negative net:") - print(f" Count: {len(next_bar_positive)}") - print(f" Avg 5d return: {rets.mean()*100:.2f}%") - print(f" Win rate 5d: {(rets > 0).mean()*100:.1f}%") - - # After volume climax (V2) with negative net - climax = dataset[(dataset["volume_str"] == "V2") & (dataset["net_direction"] == -1)] - if len(climax) >= 10: - rets = climax["fwd_5d"].dropna() - print(f"\n After volume climax (V2) on down bar:") - print(f" Count: {len(climax)}") - print(f" Avg 5d return: {rets.mean()*100:.2f}%") - print(f" Win rate 5d: {(rets > 0).mean()*100:.1f}%") - - # H20 (max pivot high) — trend strength - h20 = dataset[dataset["pivot_str"] == "H20"] - if len(h20) >= 10: - rets = h20["fwd_5d"].dropna() - print(f"\n At H20 (20-bar high streak):") - print(f" Count: {len(h20)}") - print(f" Avg 5d return: {rets.mean()*100:.2f}%") - print(f" Win rate 5d: {(rets > 0).mean()*100:.1f}%") - - # Best bigrams - print(f"\n TOP 15 BIGRAMS BY 5-DAY RETURN (min 20 occurrences):") - bigram_groups = dataset.groupby("bigram") - bigram_results = [] - for bg, grp in bigram_groups: - if len(grp) < 20: - continue - rets = grp["fwd_5d"].dropna() - bigram_results.append({ - "bigram": bg, - "count": len(grp), - "avg_5d": rets.mean() * 100, - "win_5d": (rets > 0).mean() * 100, - }) - - bigram_results.sort(key=lambda r: r["avg_5d"], reverse=True) - print(f" {'Bigram':<35} {'Count':>6} {'Avg5d':>7} {'Win5d':>6}") - print(f" {'─'*35} {'─'*6} {'─'*7} {'─'*6}") - for r in bigram_results[:15]: - print(f" {r['bigram']:<35} {r['count']:>6} {r['avg_5d']:>6.2f}% {r['win_5d']:>5.1f}%") - - print(f"\n BOTTOM 15 BIGRAMS (bearish):") - for r in bigram_results[-15:]: - print(f" {r['bigram']:<35} {r['count']:>6} {r['avg_5d']:>6.2f}% {r['win_5d']:>5.1f}%") - - -def analyze_high_volume_context(dataset, min_count=20): - """High volume bars are the most informative — analyze separately.""" - print(f"\n{'='*70}") - print(f" HIGH VOLUME (V2) BAR CONTEXT") - print(f"{'='*70}") - - hv = dataset[dataset["volume_str"] == "V2"] - print(f" Total V2 bars: {len(hv)}") - - # By pivot direction + net on high volume - for pivot_dir, label in [(1, "pivot HIGH"), (-1, "pivot LOW"), (0, "tied pivot")]: - for net_dir, net_label in [(1, "positive net"), (-1, "negative net")]: - subset = hv[(hv["pivot_direction"] == pivot_dir) & (hv["net_direction"] == net_dir)] - if len(subset) < min_count: - continue - rets = subset["fwd_5d"].dropna() - if len(rets) == 0: - continue - print(f"\n V2 + {label} + {net_label}:") - print(f" Count: {len(subset)} Avg 5d: {rets.mean()*100:.2f}% Win: {(rets>0).mean()*100:.1f}%") - - -def main(): - fetcher = MassiveDataFetcher() - - print("Fetching 2 years of daily data...") - all_data = fetch_all(fetcher, SYMBOLS) - - print(f"\nBuilding encoded dataset...") - dataset = build_encoded_dataset(all_data) - print(f"Dataset: {len(dataset)} rows, {dataset['symbol'].nunique()} symbols") - - # Dimension-by-dimension analysis - analyze_single_dimension(dataset, "pivot_str", "pivot", min_count=50) - analyze_single_dimension(dataset, "net_str", "net change", min_count=50) - analyze_single_dimension(dataset, "range_str", "range", min_count=50) - analyze_single_dimension(dataset, "volume_str", "volume", min_count=50) - - # Combination analysis - analyze_combinations(dataset, min_count=30) - - # Sequential patterns - analyze_sequences(dataset, min_count=20) - - # High volume context - analyze_high_volume_context(dataset, min_count=20) - - -if __name__ == "__main__": - main() diff --git a/scripts/markets/analyze_encoding_patterns.py b/scripts/markets/analyze_encoding_patterns.py deleted file mode 100644 index 12e5846..0000000 --- a/scripts/markets/analyze_encoding_patterns.py +++ /dev/null @@ -1,260 +0,0 @@ -""" -Fetch recent price data, encode it, and find profitable repeatable patterns. - -Analyzes which encoded token patterns consistently precede positive returns. -""" - -import os -import sys -from collections import defaultdict -from datetime import datetime, timedelta - -import numpy as np -import pandas as pd - -# Backend imports -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app", "backend")) -from dotenv import load_dotenv -load_dotenv(os.path.join(os.path.dirname(__file__), "..", "app", "backend", ".env")) - -from data_fetcher import MassiveDataFetcher - -# Encoding imports -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from app.agent.ml.encoding.encoder import encode_price_df -from app.agent.ml.encoding.patterns import find_patterns, get_pattern_catalog - - -SYMBOLS = [ - "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", - "JPM", "V", "UNH", "HD", "NFLX", "ADBE", "CRM", "AMD", - "PYPL", "BA", "GS", "XOM", "CVX", "COST", "COIN", "SQ", -] - -LOOKBACK_DAYS = 365 * 2 # 2 years of daily data -FORWARD_RETURNS = [1, 3, 5, 10] # Days ahead to measure returns - - -def fetch_symbol(fetcher, symbol, days_back): - """Fetch daily bars for a symbol.""" - end = datetime.now().strftime("%Y-%m-%d") - start = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d") - df = fetcher.fetch_bars(symbol, "1d", start, end) - return df - - -def analyze_ngram_profitability(enc_df, original_df, n=2): - """Find which n-gram token sequences precede profitable moves.""" - close = original_df["close"].values - tokens = enc_df["encoded_str"].values - - ngram_stats = defaultdict(lambda: { - "occurrences": 0, - "returns_1d": [], - "returns_3d": [], - "returns_5d": [], - "returns_10d": [], - }) - - for i in range(n - 1, len(tokens) - 10): - ngram = " ".join(tokens[i - n + 1 : i + 1]) - stats = ngram_stats[ngram] - stats["occurrences"] += 1 - - for fwd in FORWARD_RETURNS: - if i + fwd < len(close): - ret = (close[i + fwd] - close[i]) / close[i] - stats[f"returns_{fwd}d"].append(ret) - - return ngram_stats - - -def analyze_regex_patterns(enc_df, original_df): - """Find which regex-defined patterns precede profitable moves.""" - close = original_df["close"].values - tokens = enc_df["encoded_str"].values - full_encoded = " ".join(tokens) - - pattern_stats = defaultdict(lambda: { - "occurrences": 0, - "returns_1d": [], - "returns_3d": [], - "returns_5d": [], - "returns_10d": [], - }) - - # Sliding window analysis - window = 5 - for i in range(window - 1, len(tokens) - 10): - window_str = " ".join(tokens[i - window + 1 : i + 1]) - matches = find_patterns(window_str) - - for m in matches: - stats = pattern_stats[m.name] - stats["occurrences"] += 1 - - for fwd in FORWARD_RETURNS: - if i + fwd < len(close): - ret = (close[i + fwd] - close[i]) / close[i] - stats[f"returns_{fwd}d"].append(ret) - - return pattern_stats - - -def print_top_patterns(ngram_stats, title, min_occurrences=10, sort_by="returns_5d"): - """Print the most profitable repeating patterns.""" - print(f"\n{'='*80}") - print(f" {title}") - print(f"{'='*80}") - - # Filter and compute stats - rows = [] - for pattern, stats in ngram_stats.items(): - if stats["occurrences"] < min_occurrences: - continue - row = {"pattern": pattern, "count": stats["occurrences"]} - for fwd in FORWARD_RETURNS: - rets = stats[f"returns_{fwd}d"] - if rets: - row[f"avg_{fwd}d"] = np.mean(rets) * 100 - row[f"win_{fwd}d"] = (np.array(rets) > 0).mean() * 100 - row[f"sharpe_{fwd}d"] = (np.mean(rets) / max(np.std(rets), 1e-10)) * np.sqrt(252 / fwd) - else: - row[f"avg_{fwd}d"] = 0 - row[f"win_{fwd}d"] = 50 - row[f"sharpe_{fwd}d"] = 0 - rows.append(row) - - if not rows: - print(" No patterns with enough occurrences found.") - return [] - - # Sort by average return - sort_key = f"avg_{sort_by.replace('returns_', '').replace('d', '')}d" if "returns" in sort_by else f"avg_{sort_by}" - rows.sort(key=lambda r: r.get(sort_key, 0), reverse=True) - - # Print top bullish - print(f"\n TOP BULLISH PATTERNS (sorted by avg 5d return)") - print(f" {'Pattern':<35} {'Count':>6} {'Avg 1d':>8} {'Avg 5d':>8} {'Win% 5d':>8} {'Sharpe':>8}") - print(f" {'-'*35} {'-'*6} {'-'*8} {'-'*8} {'-'*8} {'-'*8}") - for row in rows[:15]: - print(f" {row['pattern']:<35} {row['count']:>6} {row['avg_1d']:>7.2f}% {row['avg_5d']:>7.2f}% {row['win_5d']:>7.1f}% {row.get('sharpe_5d', 0):>8.2f}") - - # Print top bearish - print(f"\n TOP BEARISH PATTERNS (sorted by avg 5d return)") - print(f" {'Pattern':<35} {'Count':>6} {'Avg 1d':>8} {'Avg 5d':>8} {'Win% 5d':>8} {'Sharpe':>8}") - print(f" {'-'*35} {'-'*6} {'-'*8} {'-'*8} {'-'*8} {'-'*8}") - for row in rows[-15:]: - print(f" {row['pattern']:<35} {row['count']:>6} {row['avg_1d']:>7.2f}% {row['avg_5d']:>7.2f}% {row['win_5d']:>7.1f}% {row.get('sharpe_5d', 0):>8.2f}") - - return rows - - -def main(): - fetcher = MassiveDataFetcher() - - all_ngram_stats = defaultdict(lambda: { - "occurrences": 0, - "returns_1d": [], - "returns_3d": [], - "returns_5d": [], - "returns_10d": [], - }) - all_pattern_stats = defaultdict(lambda: { - "occurrences": 0, - "returns_1d": [], - "returns_3d": [], - "returns_5d": [], - "returns_10d": [], - }) - - print(f"Fetching {LOOKBACK_DAYS} days of daily data for {len(SYMBOLS)} symbols...") - print() - - for i, symbol in enumerate(SYMBOLS): - print(f" [{i+1}/{len(SYMBOLS)}] {symbol}...", end=" ", flush=True) - df = fetch_symbol(fetcher, symbol, LOOKBACK_DAYS) - - if df is None or df.empty or len(df) < 60: - print("insufficient data") - continue - - # Ensure numeric - for col in ["open", "high", "low", "close"]: - df[col] = pd.to_numeric(df[col], errors="coerce") - df["volume"] = pd.to_numeric(df["volume"], errors="coerce").fillna(0) - df = df.dropna(subset=["close"]).reset_index(drop=True) - - print(f"{len(df)} bars", end=" ", flush=True) - - # Encode - enc_df = encode_price_df(df, p=20) - print(f"-> {len(enc_df)} encoded", end=" ", flush=True) - - # N-gram analysis (2-grams and 3-grams) - for n in [2, 3]: - ngram_stats = analyze_ngram_profitability(enc_df, df, n=n) - for pattern, stats in ngram_stats.items(): - merged = all_ngram_stats[pattern] - merged["occurrences"] += stats["occurrences"] - for fwd in FORWARD_RETURNS: - merged[f"returns_{fwd}d"].extend(stats[f"returns_{fwd}d"]) - - # Regex pattern analysis - pattern_stats = analyze_regex_patterns(enc_df, df) - for pattern, stats in pattern_stats.items(): - merged = all_pattern_stats[pattern] - merged["occurrences"] += stats["occurrences"] - for fwd in FORWARD_RETURNS: - merged[f"returns_{fwd}d"].extend(stats[f"returns_{fwd}d"]) - - print("done") - - # Print results - print_top_patterns( - all_ngram_stats, - "N-GRAM TOKEN PATTERNS (across all symbols)", - min_occurrences=50, - sort_by="5d", - ) - - print_top_patterns( - all_pattern_stats, - "REGEX-DEFINED PATTERNS (across all symbols)", - min_occurrences=20, - sort_by="5d", - ) - - # Summary stats - print(f"\n{'='*80}") - print(f" SUMMARY") - print(f"{'='*80}") - total_ngrams = len([k for k, v in all_ngram_stats.items() if v["occurrences"] >= 50]) - total_patterns = len([k for k, v in all_pattern_stats.items() if v["occurrences"] >= 20]) - print(f" N-grams with 50+ occurrences: {total_ngrams}") - print(f" Regex patterns with 20+ occurrences: {total_patterns}") - - # Best overall signals - print(f"\n BEST ACTIONABLE SIGNALS (>=50 occurrences, >55% win rate, >0.3% avg 5d return):") - best = [] - for pattern, stats in {**all_ngram_stats, **all_pattern_stats}.items(): - if stats["occurrences"] < 50: - continue - rets_5d = stats["returns_5d"] - if not rets_5d: - continue - avg = np.mean(rets_5d) * 100 - win = (np.array(rets_5d) > 0).mean() * 100 - if win > 55 and avg > 0.3: - best.append((pattern, stats["occurrences"], avg, win)) - - best.sort(key=lambda x: x[2], reverse=True) - for pat, count, avg, win in best[:20]: - print(f" {pat:<35} count={count:>5} avg_5d={avg:>6.2f}% win={win:>5.1f}%") - - if not best: - print(" (none found — try with more data or lower thresholds)") - - -if __name__ == "__main__": - main() diff --git a/scripts/markets/massive_app.py b/scripts/markets/massive_app.py deleted file mode 100644 index 3ba5640..0000000 --- a/scripts/markets/massive_app.py +++ /dev/null @@ -1,201 +0,0 @@ -from massive import WebSocketClient -from massive.websocket.models import WebSocketMessage, EquityTrade -from typing import List -from typing import Dict -from datetime import datetime -import time -import threading -import os - -# docs -# https://massive.com/docs/stocks/ws_stocks_am - -# This program connects to the Massive WebSocket API, authenticates the -# connection, and subscribes to receive trades. Every 5 seconds, it counts -# the number of trades per symbol and stores the results in a map. The -# program then prints the map, which gives a readout of the top stocks -# traded in the past 5 seconds. - -# Here's what the output looks like after running it for a couple hours: - -""" - --- Past 5 seconds --- - Tickers seen (5s): 1697 - Trades seen (5s): 12335 - Cash traded (5s): 88,849,414.33 - - --- Running Totals --- - Total Tickers seen: 13848 - Total Trades seen: 22775838 - Total Cash traded: 178,499,702,488.96 - ----------------------------------------------------------------------------------------------------- - -Ticker Trades (5s) Cash (5s) Total Trades Total Cash -NVDA 445 6,933,283.61 632550 18,291,747,596.36 -TSLA 279 8,144,556.76 639585 11,319,594,268.07 -NVDL 277 3,748,806.85 10451 99,902,192.88 -TELL 171 78,424.03 6154 3,710,200.38 -AFRM 163 968,984.99 224338 745,895,134.93 -AAPL 134 2,359,278.02 304572 2,932,389,741.58 -QQQ 132 5,788,859.71 246679 11,003,577,730.48 -NVDS 130 598,047.04 7846 48,854,967.44 -SOXL 127 786,026.38 189184 719,639,349.26 -AMD 116 1,549,180.08 304704 3,713,351,432.39 -SPY 113 6,628,554.14 278926 15,435,607,506.98 -MSFT 109 1,600,861.75 148047 2,396,824,971.18 -SQQQ 88 1,006,330.83 173406 2,065,760,858.90 -TQQQ 83 717,574.40 296021 2,580,097,288.27 -PLUG 82 106,542.65 31921 53,825,007.27 -ITB 75 455,902.33 23369 185,892,273.60 -AVGO 71 1,955,826.79 31586 633,629,812.65 -STX 71 273,681.77 8420 34,141,139.17 -XPEV 68 234,765.41 41284 127,781,104.54 -OIH 55 662.12 2964 65,848,514.45 -XEL 54 197,642.42 18524 103,054,857.37 -XLU 53 850,017.20 35963 291,891,266.17 -ARRY 52 164,056.54 11354 23,001,537.49 -META 52 1,457,535.82 150793 2,717,344,906.63 -PLTR 52 147,743.93 86456 396,851,801.06 - -Current Time: 2023-08-25 08:27:14.602075 | App Uptime: 04:49:40 | Time taken: 0.003417 seconds -""" - -app_start_time = time.time() -string_map: Dict[str, int] = {} -cash_map_5s: Dict[str, float] = {} -cash_traded = float(0) - -# totals -total_tickers_seen = 0 -total_trades_seen = 0 -total_cash_traded = 0.0 - -# These dictionaries will keep track of the running total of trades and cash per ticker. -total_string_map: Dict[str, int] = {} -total_cash_map: Dict[str, float] = {} - - -def print_centered(s: str): - term_width = os.get_terminal_size().columns - print(s.center(term_width)) - - -def run_websocket_client(): - # client = WebSocketClient("XXXXXX") # hardcoded api_key is used - client = WebSocketClient() # MASSIVE_API_KEY environment variable is used - client.subscribe("T.*") # all trades - client.run(handle_msg) - - -def handle_msg(msgs: List[WebSocketMessage]): - global cash_traded - global total_tickers_seen, total_trades_seen, total_cash_traded - for m in msgs: - if isinstance(m, EquityTrade): - # Update total trades and cash for the past 5 seconds - if isinstance(m.symbol, str): - string_map[m.symbol] = string_map.get(m.symbol, 0) + 1 - total_string_map[m.symbol] = total_string_map.get(m.symbol, 0) + 1 - - # Update cash traded - if isinstance(m.price, float) and isinstance(m.size, int): - cash_value = m.price * m.size - cash_traded += cash_value - total_cash_map[m.symbol] = ( # type: ignore - total_cash_map.get(m.symbol, 0) + cash_value # type: ignore - ) - - # Update cash for the past 5 seconds - cash_map_5s[m.symbol] = ( # type: ignore - cash_map_5s.get(m.symbol, 0) + cash_value # type: ignore - ) # Okay! - - # Update totals - total_tickers_seen = len(total_string_map) - total_trades_seen += 1 - total_cash_traded += cash_value - - -def top_function(): - # start timer - start_time = time.time() - global cash_traded - - # Only sort the string_map once - sorted_trades_5s = sorted(string_map.items(), key=lambda x: x[1], reverse=True) - - # Clear screen - print("\033c", end="") - - # Print 5-second snapshot - print() - print_centered("--- Past 5 seconds ---") - print_centered(f"Tickers seen (5s): {len(string_map)}") - print_centered(f"Trades seen (5s): {sum(string_map.values())}") - print_centered(f"Cash traded (5s): {cash_traded:,.2f}") - print() - print_centered("--- Running Totals ---") - print_centered(f"Total Tickers seen: {total_tickers_seen}") - print_centered(f"Total Trades seen: {total_trades_seen}") - print_centered(f"Total Cash traded: {total_cash_traded:,.2f}") - - # Separator - print() - print_centered("-" * 100 + "\n") - - # Print table header - print_centered( - "{:<15}{:<20}{:<20}{:<20}{:<20}".format( - "Ticker", "Trades (5s)", "Cash (5s)", "Total Trades", "Total Cash" - ) - ) - - # Print table content - for ticker, trades in sorted(string_map.items(), key=lambda x: x[1], reverse=True)[ - :25 - ]: - cash_5s = cash_map_5s.get(ticker, 0) - total_trades = total_string_map[ticker] - total_cash = total_cash_map.get(ticker, 0.0) - print_centered( - "{:<15}{:<20}{:<20,.2f}{:<20}{:<20,.2f}".format( - ticker, trades, cash_5s, total_trades, total_cash - ) - ) - - # Print times - end_time = time.time() - current_time = datetime.now() - - # Print elapsed the since we started - elapsed_time = time.time() - app_start_time - hours, rem = divmod(elapsed_time, 3600) - minutes, seconds = divmod(rem, 60) - - # Print the time and quick stats - print() - print_centered( - f"Current Time: {current_time} | App Uptime: {int(hours):02}:{int(minutes):02}:{int(seconds):02} | Time taken: {end_time - start_time:.6f} seconds" - ) - - # clear map and cash for next loop - string_map.clear() - cash_map_5s.clear() - cash_traded = 0 - - -def run_function_periodically(): - while True: - top_function() - time.sleep(5) - - -thread1 = threading.Thread(target=run_function_periodically) -thread2 = threading.Thread(target=run_websocket_client) - -thread1.start() -thread2.start() - -thread1.join() -thread2.join() diff --git a/scripts/markets/massive_screener.py b/scripts/markets/massive_screener.py deleted file mode 100644 index b0f0267..0000000 --- a/scripts/markets/massive_screener.py +++ /dev/null @@ -1,204 +0,0 @@ -""" -Massive WebSocket Stock Screener (formerly Polygon) -""" - -from massive import WebSocketClient -from massive.websocket.models import WebSocketMessage, EquityTrade -from typing import List -from typing import Dict -from datetime import datetime -import time -import threading -import os - -# docs -# https://massive.com/docs/stocks/ws_stocks_am - -# This program connects to the Massive WebSocket API, authenticates the -# connection, and subscribes to receive trades. Every 5 seconds, it counts -# the number of trades per symbol and stores the results in a map. The -# program then prints the map, which gives a readout of the top stocks -# traded in the past 5 seconds. - -# Here's what the output looks like after running it for a couple hours: - -""" - --- Past 5 seconds --- - Tickers seen (5s): 1697 - Trades seen (5s): 12335 - Cash traded (5s): 88,849,414.33 - - --- Running Totals --- - Total Tickers seen: 13848 - Total Trades seen: 22775838 - Total Cash traded: 178,499,702,488.96 - ----------------------------------------------------------------------------------------------------- - -Ticker Trades (5s) Cash (5s) Total Trades Total Cash -NVDA 445 6,933,283.61 632550 18,291,747,596.36 -TSLA 279 8,144,556.76 639585 11,319,594,268.07 -NVDL 277 3,748,806.85 10451 99,902,192.88 -TELL 171 78,424.03 6154 3,710,200.38 -AFRM 163 968,984.99 224338 745,895,134.93 -AAPL 134 2,359,278.02 304572 2,932,389,741.58 -QQQ 132 5,788,859.71 246679 11,003,577,730.48 -NVDS 130 598,047.04 7846 48,854,967.44 -SOXL 127 786,026.38 189184 719,639,349.26 -AMD 116 1,549,180.08 304704 3,713,351,432.39 -SPY 113 6,628,554.14 278926 15,435,607,506.98 -MSFT 109 1,600,861.75 148047 2,396,824,971.18 -SQQQ 88 1,006,330.83 173406 2,065,760,858.90 -TQQQ 83 717,574.40 296021 2,580,097,288.27 -PLUG 82 106,542.65 31921 53,825,007.27 -ITB 75 455,902.33 23369 185,892,273.60 -AVGO 71 1,955,826.79 31586 633,629,812.65 -STX 71 273,681.77 8420 34,141,139.17 -XPEV 68 234,765.41 41284 127,781,104.54 -OIH 55 662.12 2964 65,848,514.45 -XEL 54 197,642.42 18524 103,054,857.37 -XLU 53 850,017.20 35963 291,891,266.17 -ARRY 52 164,056.54 11354 23,001,537.49 -META 52 1,457,535.82 150793 2,717,344,906.63 -PLTR 52 147,743.93 86456 396,851,801.06 - -Current Time: 2023-08-25 08:27:14.602075 | App Uptime: 04:49:40 | Time taken: 0.003417 seconds -""" - -app_start_time = time.time() -string_map: Dict[str, int] = {} -cash_map_5s: Dict[str, float] = {} -cash_traded = float(0) - -# totals -total_tickers_seen = 0 -total_trades_seen = 0 -total_cash_traded = 0.0 - -# These dictionaries will keep track of the running total of trades and cash per ticker. -total_string_map: Dict[str, int] = {} -total_cash_map: Dict[str, float] = {} - - -def print_centered(s: str): - term_width = os.get_terminal_size().columns - print(s.center(term_width)) - - -def run_websocket_client(): - client = WebSocketClient() # MASSIVE_API_KEY environment variable is used - client.subscribe("T.*") # all trades - client.run(handle_msg) - - -def handle_msg(msgs: List[WebSocketMessage]): - global cash_traded - global total_tickers_seen, total_trades_seen, total_cash_traded - for m in msgs: - if isinstance(m, EquityTrade): - # Update total trades and cash for the past 5 seconds - if isinstance(m.symbol, str): - string_map[m.symbol] = string_map.get(m.symbol, 0) + 1 - total_string_map[m.symbol] = total_string_map.get(m.symbol, 0) + 1 - - # Update cash traded - if isinstance(m.price, float) and isinstance(m.size, int): - cash_value = m.price * m.size - cash_traded += cash_value - total_cash_map[m.symbol] = ( # type: ignore - total_cash_map.get(m.symbol, 0) + cash_value # type: ignore - ) - - # Update cash for the past 5 seconds - cash_map_5s[m.symbol] = ( # type: ignore - cash_map_5s.get(m.symbol, 0) + cash_value # type: ignore - ) # Okay! - - # Update totals - total_tickers_seen = len(total_string_map) - total_trades_seen += 1 - total_cash_traded += cash_value - - -def top_function(): - # start timer - start_time = time.time() - global cash_traded - - # Only sort the string_map once - sorted_trades_5s = sorted(string_map.items(), key=lambda x: x[1], reverse=True) - - # Clear screen - print("\033c", end="") - - # Print 5-second snapshot - print() - print_centered("--- Past 5 seconds ---") - print_centered(f"Tickers seen (5s): {len(string_map)}") - print_centered(f"Trades seen (5s): {sum(string_map.values())}") - print_centered(f"Cash traded (5s): {cash_traded:,.2f}") - print() - print_centered("--- Running Totals ---") - print_centered(f"Total Tickers seen: {total_tickers_seen}") - print_centered(f"Total Trades seen: {total_trades_seen}") - print_centered(f"Total Cash traded: {total_cash_traded:,.2f}") - - # Separator - print() - print_centered("-" * 100 + "\n") - - # Print table header - print_centered( - "{:<15}{:<20}{:<20}{:<20}{:<20}".format( - "Ticker", "Trades (5s)", "Cash (5s)", "Total Trades", "Total Cash" - ) - ) - - # Print table content - for ticker, trades in sorted(string_map.items(), key=lambda x: x[1], reverse=True)[ - :25 - ]: - cash_5s = cash_map_5s.get(ticker, 0) - total_trades = total_string_map[ticker] - total_cash = total_cash_map.get(ticker, 0.0) - print_centered( - "{:<15}{:<20}{:<20,.2f}{:<20}{:<20,.2f}".format( - ticker, trades, cash_5s, total_trades, total_cash - ) - ) - - # Print times - end_time = time.time() - current_time = datetime.now() - - # Print elapsed the since we started - elapsed_time = time.time() - app_start_time - hours, rem = divmod(elapsed_time, 3600) - minutes, seconds = divmod(rem, 60) - - # Print the time and quick stats - print() - print_centered( - f"Current Time: {current_time} | App Uptime: {int(hours):02}:{int(minutes):02}:{int(seconds):02} | Time taken: {end_time - start_time:.6f} seconds" - ) - - # clear map and cash for next loop - string_map.clear() - cash_map_5s.clear() - cash_traded = 0 - - -def run_function_periodically(): - while True: - top_function() - time.sleep(5) - - -thread1 = threading.Thread(target=run_function_periodically) -thread2 = threading.Thread(target=run_websocket_client) - -thread1.start() -thread2.start() - -thread1.join() -thread2.join() diff --git a/scripts/markets/test_massive.py b/scripts/markets/test_massive.py deleted file mode 100644 index 732268b..0000000 --- a/scripts/markets/test_massive.py +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/env python -""" -Quick Massive API Test Script (formerly Polygon.io) -Tests basic Massive API connectivity and functionality -""" - -import os -import time -from datetime import datetime, timedelta -from massive import RESTClient, WebSocketClient -from massive.websocket.models import WebSocketMessage, EquityTrade - -def test_api_key(): - """Check if API key is set""" - api_key = os.getenv("MASSIVE_API_KEY") - if not api_key: - print("❌ MASSIVE_API_KEY environment variable not set") - return False - print(f"✓ API Key found: {api_key[:8]}...") - return True - -def test_rest_client(): - """Test REST client initialization""" - try: - client = RESTClient() - print("✓ REST client initialized") - return client - except Exception as e: - print(f"❌ Failed to initialize REST client: {e}") - return None - -def test_stock_quote(client, symbol="AAPL"): - """Test getting a stock quote""" - try: - # Get last trade - trades = client.list_trades(symbol, limit=1) - for trade in trades: - print(f"✓ Latest trade for {symbol}:") - print(f" Price: ${trade.price}") - print(f" Size: {trade.size}") - print(f" Time: {datetime.fromtimestamp(trade.participant_timestamp / 1000000000)}") - return True - except Exception as e: - print(f"❌ Failed to get quote for {symbol}: {e}") - return False - -def test_historical_data(client, symbol="AAPL", days=5): - """Test fetching historical data""" - try: - end_date = datetime.now() - start_date = end_date - timedelta(days=days) - - # Get aggregates (OHLCV bars) - aggs = client.get_aggs( - symbol, - 1, - "day", - start_date.strftime("%Y-%m-%d"), - end_date.strftime("%Y-%m-%d"), - limit=50000 - ) - - bars = list(aggs) - if bars: - print(f"✓ Historical data for {symbol} (last {days} days):") - print(f" Bars received: {len(bars)}") - latest = bars[-1] - print(f" Latest close: ${latest.close}") - print(f" Volume: {latest.volume:,}") - return True - else: - print(f"❌ No historical data returned for {symbol}") - return False - except Exception as e: - print(f"❌ Failed to get historical data: {e}") - return False - -def test_ticker_details(client, symbol="AAPL"): - """Test getting ticker details""" - try: - details = client.get_ticker_details(symbol) - print(f"✓ Ticker details for {symbol}:") - print(f" Name: {details.name}") - print(f" Market: {details.market}") - print(f" Type: {details.type}") - return True - except Exception as e: - print(f"❌ Failed to get ticker details: {e}") - return False - -def test_websocket(): - """Test WebSocket connection and trade stream""" - print("Testing WebSocket connection (will run for 10 seconds)...") - - trade_count = 0 - symbols_seen = set() - - def handle_msg(msgs): - nonlocal trade_count, symbols_seen - for msg in msgs: - if isinstance(msg, EquityTrade): - trade_count += 1 - symbols_seen.add(msg.symbol) - - try: - client = WebSocketClient() - client.subscribe("T.AAPL", "T.TSLA", "T.NVDA") # Subscribe to specific tickers - - # Run for 10 seconds in background - import threading - ws_thread = threading.Thread(target=lambda: client.run(handle_msg)) - ws_thread.daemon = True - ws_thread.start() - - # Wait 10 seconds - time.sleep(10) - - if trade_count > 0: - print(f"✓ WebSocket working!") - print(f" Trades received: {trade_count}") - print(f" Symbols seen: {', '.join(sorted(symbols_seen))}") - return True - else: - print(f"⚠️ WebSocket connected but no trades received (market may be closed)") - return True - - except Exception as e: - print(f"❌ WebSocket test failed: {e}") - return False - -def main(): - """Run all tests""" - print("=" * 60) - print("Massive API Test Suite") - print("=" * 60) - print() - - # Test 1: API Key - if not test_api_key(): - print("\nTests aborted - no API key found") - return - - print() - - # Test 2: REST Client - client = test_rest_client() - if not client: - print("\nTests aborted - client initialization failed") - return - - print() - - # Test 3: Stock Quote - test_stock_quote(client) - print() - - # Test 4: Historical Data - test_historical_data(client) - print() - - # Test 5: Ticker Details - test_ticker_details(client) - print() - - # Test 6: WebSocket - test_websocket() - print() - - print("=" * 60) - print("All tests completed!") - print("=" * 60) - -if __name__ == "__main__": - main() diff --git a/scripts/markets/test_massive_top_gainers.py b/scripts/markets/test_massive_top_gainers.py deleted file mode 100644 index 6eccd7d..0000000 --- a/scripts/markets/test_massive_top_gainers.py +++ /dev/null @@ -1,200 +0,0 @@ -#!/usr/bin/env python -""" -Massive Top Gainers Test Script v5 (formerly Polygon.io) -Uses the Top Market Movers endpoint: /v2/snapshot/locale/us/markets/stocks/{direction} -""" - -import os -from massive import RESTClient - -def test_api_key(): - """Check if API key is set""" - api_key = os.getenv("MASSIVE_API_KEY") - if not api_key: - print("❌ MASSIVE_API_KEY environment variable not set") - return False - print(f"✓ API Key found: {api_key[:8]}...") - return True - -def get_top_gainers(client, limit=20): - """Get top gainers using top market movers endpoint""" - try: - print(f"\nFetching top {limit} gainers...") - - # Try the direct method call - try: - resp = client.get_top_market_movers(direction="gainers") - except AttributeError: - # If that doesn't work, try the raw request - import requests - api_key = os.getenv("MASSIVE_API_KEY") - url = f"https://api.massive.com/v2/snapshot/locale/us/markets/stocks/gainers?apiKey={api_key}" - response = requests.get(url) - response.raise_for_status() - data = response.json() - - if data.get('status') != 'OK': - print(f"❌ API returned status: {data.get('status')}") - return False - - tickers = data.get('tickers', []) - - gainers = [] - for ticker_data in tickers: - symbol = ticker_data.get('ticker', '') - today_change_pct = ticker_data.get('todaysChangePerc', 0) - price = 0 - volume = 0 - - day = ticker_data.get('day', {}) - if day: - price = day.get('c', 0) - volume = day.get('v', 0) - - gainers.append({ - 'symbol': symbol, - 'price': price, - 'change_pct': today_change_pct, - 'volume': volume - }) - - # Sort by percent change (descending) - gainers.sort(key=lambda x: x['change_pct'], reverse=True) - - # Get top N - top_gainers = gainers[:limit] - - print(f"Total gainers received: {len(gainers)}") - print() - - if top_gainers: - print(f"{'Symbol':<10} {'Price':<12} {'Change %':<12} {'Volume':<15}") - print("=" * 49) - - for gainer in top_gainers: - print( - f"{gainer['symbol']:<10} " - f"${gainer['price']:<11.2f} " - f"{gainer['change_pct']:>10.2f}% " - f"{int(gainer['volume']):>14,}" - ) - - return True - else: - print("❌ No gainers data available") - return False - - except Exception as e: - print(f"❌ Failed to get gainers: {e}") - import traceback - traceback.print_exc() - return False - -def get_top_losers(client, limit=20): - """Get top losers using top market movers endpoint""" - try: - print(f"\nFetching top {limit} losers...") - - # Use raw request approach - import requests - api_key = os.getenv("MASSIVE_API_KEY") - url = f"https://api.massive.com/v2/snapshot/locale/us/markets/stocks/losers?apiKey={api_key}" - response = requests.get(url) - response.raise_for_status() - data = response.json() - - if data.get('status') != 'OK': - print(f"❌ API returned status: {data.get('status')}") - return False - - tickers = data.get('tickers', []) - - losers = [] - for ticker_data in tickers: - symbol = ticker_data.get('ticker', '') - today_change_pct = ticker_data.get('todaysChangePerc', 0) - price = 0 - volume = 0 - - day = ticker_data.get('day', {}) - if day: - price = day.get('c', 0) - volume = day.get('v', 0) - - losers.append({ - 'symbol': symbol, - 'price': price, - 'change_pct': today_change_pct, - 'volume': volume - }) - - # Sort by percent change (ascending for losers) - losers.sort(key=lambda x: x['change_pct']) - - # Get top N - top_losers = losers[:limit] - - print(f"Total losers received: {len(losers)}") - print() - - if top_losers: - print(f"{'Symbol':<10} {'Price':<12} {'Change %':<12} {'Volume':<15}") - print("=" * 49) - - for loser in top_losers: - print( - f"{loser['symbol']:<10} " - f"${loser['price']:<11.2f} " - f"{loser['change_pct']:>10.2f}% " - f"{int(loser['volume']):>14,}" - ) - - return True - else: - print("❌ No losers data available") - return False - - except Exception as e: - print(f"❌ Failed to get losers: {e}") - import traceback - traceback.print_exc() - return False - -def main(): - """Run gainers/losers test""" - print("=" * 80) - print("Massive Top Gainers & Losers Test (v5 - Using Top Market Movers Endpoint)") - print("=" * 80) - print() - - # Test API Key - if not test_api_key(): - print("\nTest aborted - no API key found") - return - - print() - - # Initialize REST Client - try: - client = RESTClient() - print("✓ REST client initialized") - except Exception as e: - print(f"❌ Failed to initialize REST client: {e}") - return - - # Get top gainers - get_top_gainers(client, limit=20) - - print() - print("-" * 80) - - # Get top losers - get_top_losers(client, limit=20) - - print() - print("=" * 80) - print("Test completed!") - print("=" * 80) - -if __name__ == "__main__": - main() diff --git a/scripts/run_agent.py b/scripts/run_agent.py deleted file mode 100644 index 026f357..0000000 --- a/scripts/run_agent.py +++ /dev/null @@ -1,136 +0,0 @@ -#!/usr/bin/env python -"""Entry point for the trading agent.""" - -import argparse -import asyncio -import logging -import sys -from pathlib import Path - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from agent.trading_agent import TradingAgent, AgentConfig - - -def setup_logging(verbose: bool = False) -> None: - """Configure logging.""" - level = logging.DEBUG if verbose else logging.INFO - logging.basicConfig( - level=level, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - handlers=[ - logging.StreamHandler(), - logging.FileHandler("agent/logs/agent.log", mode="a"), - ], - ) - - # Reduce noise from libraries - logging.getLogger("httpx").setLevel(logging.WARNING) - logging.getLogger("anthropic").setLevel(logging.WARNING) - - -def main(): - """Main entry point.""" - parser = argparse.ArgumentParser( - description="Run the AlphaPy trading agent" - ) - parser.add_argument( - "--config", - "-c", - default="agent/config/agent.yml", - help="Path to agent configuration file", - ) - parser.add_argument( - "--verbose", - "-v", - action="store_true", - help="Enable verbose output", - ) - parser.add_argument( - "--once", - action="store_true", - help="Run a single cycle and exit", - ) - parser.add_argument( - "--chat", - action="store_true", - help="Run in interactive chat mode", - ) - - args = parser.parse_args() - - # Ensure logs directory exists - Path("agent/logs").mkdir(parents=True, exist_ok=True) - - # Setup logging - setup_logging(args.verbose) - logger = logging.getLogger(__name__) - - # Load config - config_path = Path(args.config) - if config_path.exists(): - logger.info(f"Loading config from {config_path}") - config = AgentConfig.from_yaml(str(config_path)) - else: - logger.warning(f"Config file not found: {config_path}, using defaults") - config = AgentConfig() - - # Create agent - agent = TradingAgent(config=config, verbose=args.verbose) - - if args.chat: - # Interactive chat mode - asyncio.run(interactive_mode(agent)) - elif args.once: - # Single cycle mode - logger.info("Running single cycle...") - result = asyncio.run(agent.run_cycle()) - print(result) - else: - # Continuous trading mode - logger.info("Starting continuous trading mode...") - try: - asyncio.run(agent.run()) - except KeyboardInterrupt: - logger.info("Interrupted by user") - agent.stop() - - -async def interactive_mode(agent: TradingAgent) -> None: - """Run interactive chat mode.""" - print("\nTrading Agent Interactive Mode") - print("Type 'quit' or 'exit' to stop") - print("Type 'cycle' to run a trading cycle") - print("-" * 40) - - while True: - try: - user_input = input("\nYou: ").strip() - - if not user_input: - continue - - if user_input.lower() in ["quit", "exit", "q"]: - print("Goodbye!") - break - - if user_input.lower() == "cycle": - print("\nRunning trading cycle...") - result = await agent.run_cycle() - print(f"\nAgent: {result}") - continue - - # Regular chat - response = await agent.chat(user_input) - print(f"\nAgent: {response}") - - except KeyboardInterrupt: - print("\nGoodbye!") - break - except Exception as e: - print(f"\nError: {e}") - - -if __name__ == "__main__": - main() diff --git a/tests/conftest.py b/tests/conftest.py index 05526aa..0a4606b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,16 +2,11 @@ Pytest configuration and shared fixtures for AlphaPy tests. """ import asyncio -import json -import os import shutil import tempfile -from datetime import datetime, timedelta from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch import numpy as np -import polars as pl import pytest @@ -59,263 +54,32 @@ def memory_temp_dir(tmp_path): return memory_dir -# ============================================================================ -# Sample Data Fixtures -# ============================================================================ - -@pytest.fixture -def sample_ohlcv_df(): - """Create a sample OHLCV Polars DataFrame for testing.""" - n_rows = 100 - base_price = 150.0 - dates = [datetime.now() - timedelta(minutes=5 * i) for i in range(n_rows)][::-1] - - # Generate realistic price data - np.random.seed(42) - returns = np.random.normal(0, 0.01, n_rows) - prices = base_price * np.cumprod(1 + returns) - - return pl.DataFrame({ - "datetime": dates, - "open": prices * (1 + np.random.uniform(-0.005, 0.005, n_rows)), - "high": prices * (1 + np.random.uniform(0, 0.02, n_rows)), - "low": prices * (1 - np.random.uniform(0, 0.02, n_rows)), - "close": prices, - "volume": np.random.randint(100000, 1000000, n_rows), - }) - - -@pytest.fixture -def sample_ohlcv_pandas_df(sample_ohlcv_df): - """Convert Polars OHLCV to Pandas for model tests.""" - return sample_ohlcv_df.to_pandas() - - -@pytest.fixture -def sample_bars_dict(sample_ohlcv_df): - """Create bar data dict for market data tool tests.""" - return { - "AAPL": { - "bars": sample_ohlcv_df.to_dicts(), - "count": len(sample_ohlcv_df), - "latest_close": float(sample_ohlcv_df["close"][-1]), - "latest_time": str(sample_ohlcv_df["datetime"][-1]), - }, - "_summary": { - "symbols_requested": 1, - "symbols_returned": 1, - "timeframe": "5Min", - "lookback_bars": 100, - } - } - - -@pytest.fixture -def empty_ohlcv_df(): - """Create an empty OHLCV DataFrame for edge case testing.""" - return pl.DataFrame({ - "datetime": [], - "open": [], - "high": [], - "low": [], - "close": [], - "volume": [], - }).cast({ - "datetime": pl.Datetime, - "open": pl.Float64, - "high": pl.Float64, - "low": pl.Float64, - "close": pl.Float64, - "volume": pl.Int64, - }) - - -@pytest.fixture -def small_ohlcv_df(): - """Create a small OHLCV DataFrame (5 rows) for testing indicator warm-up.""" - return pl.DataFrame({ - "datetime": [datetime.now() - timedelta(minutes=5 * i) for i in range(5)][::-1], - "open": [100.0, 101.0, 102.0, 101.5, 103.0], - "high": [101.5, 102.5, 103.0, 102.5, 104.0], - "low": [99.5, 100.5, 101.0, 100.5, 102.0], - "close": [101.0, 102.0, 101.5, 102.5, 103.5], - "volume": [100000, 120000, 110000, 130000, 140000], - }) - - -# ============================================================================ -# Mock External Clients -# ============================================================================ - -@pytest.fixture -def mock_polygon_agg(): - """Create a mock Polygon aggregate bar.""" - mock_agg = MagicMock() - mock_agg.timestamp = int(datetime.now().timestamp() * 1000) - mock_agg.open = 150.0 - mock_agg.high = 152.0 - mock_agg.low = 149.0 - mock_agg.close = 151.0 - mock_agg.volume = 500000 - mock_agg.vwap = 150.5 - mock_agg.transactions = 1000 - return mock_agg - - -@pytest.fixture -def mock_polygon_client(mock_polygon_agg): - """Mock Polygon REST client.""" - with patch("alphapy.data_sources.polygon.RESTClient") as mock: - client = MagicMock() - - # Mock list_aggs - return 100 bars - client.list_aggs.return_value = [mock_polygon_agg] * 100 - - # Mock list_trades - client.list_trades.return_value = [] - - # Mock get_snapshot_ticker - client.get_snapshot_ticker.return_value = None - - mock.return_value = client - yield client - - -@pytest.fixture -def mock_alpaca_account(): - """Create a mock Alpaca account.""" - account = MagicMock() - account.equity = 100000.0 - account.buying_power = 80000.0 - account.cash = 50000.0 - account.portfolio_value = 100000.0 - account.last_equity = 99500.0 - account.long_market_value = 45000.0 - account.short_market_value = 0.0 - account.initial_margin = 0.0 - account.maintenance_margin = 0.0 - account.daytrade_count = 0 - account.pattern_day_trader = False - account.trading_blocked = False - account.account_blocked = False - return account - - -@pytest.fixture -def mock_alpaca_position(): - """Create a mock Alpaca position.""" - position = MagicMock() - position.symbol = "AAPL" - position.qty = 100 - position.side.value = "long" - position.market_value = 15000.0 - position.cost_basis = 14500.0 - position.unrealized_pl = 500.0 - position.unrealized_plpc = 0.0345 - position.current_price = 150.0 - position.avg_entry_price = 145.0 - position.change_today = 0.02 - return position - - -@pytest.fixture -def mock_alpaca_order(): - """Create a mock Alpaca order.""" - order = MagicMock() - order.id = "test-order-123" - order.client_order_id = "client-123" - order.symbol = "AAPL" - order.side.value = "buy" - order.qty = 10 - order.filled_qty = 0 - order.type.value = "market" - order.status.value = "accepted" - order.limit_price = None - order.stop_price = None - order.filled_avg_price = None - order.created_at = datetime.now() - return order - - -@pytest.fixture -def mock_alpaca_client(mock_alpaca_account, mock_alpaca_order): - """Mock Alpaca Trading Client.""" - with patch("agent.utils.alpaca_client.TradingClient") as mock: - client = MagicMock() - - # Mock account - client.get_account.return_value = mock_alpaca_account - - # Mock positions (empty by default) - client.get_all_positions.return_value = [] - client.get_open_position.side_effect = Exception("Position not found") - - # Mock order submission - client.submit_order.return_value = mock_alpaca_order - - # Mock cancel order - client.cancel_order_by_id.return_value = None - - # Mock get orders - client.get_orders.return_value = [] - - # Mock close position - client.close_position.return_value = mock_alpaca_order - - # Mock close all positions - client.close_all_positions.return_value = [] - - mock.return_value = client - yield client - - -@pytest.fixture -def mock_anthropic_client(): - """Mock Anthropic Claude API client.""" - with patch("anthropic.Anthropic") as mock: - client = MagicMock() - - # Mock messages.create response - response = MagicMock() - response.content = [MagicMock(type="text", text="Test response from Claude")] - response.stop_reason = "end_turn" - client.messages.create.return_value = response - - mock.return_value = client - yield client - - # ============================================================================ # Mock Model Fixtures # ============================================================================ @pytest.fixture def mock_xgb_model(tmp_path): - """Create a mock XGBoost model for testing.""" + """Create a mock model with the on-disk layout AlphaPy expects.""" from sklearn.ensemble import GradientBoostingClassifier import joblib - # Create a simple model model = GradientBoostingClassifier(n_estimators=10, random_state=42) X = np.random.randn(100, 10) y = np.random.randint(0, 2, 100) model.fit(X, y) - # Create run directory structure model_dir = tmp_path / "model" config_dir = tmp_path / "config" model_dir.mkdir() config_dir.mkdir() - # Save model joblib.dump(model, model_dir / "xgb_predictor.pkl") - # Save feature map feature_names = [f"feature_{i}" for i in range(10)] feature_map = {name: i for i, name in enumerate(feature_names)} joblib.dump(feature_map, model_dir / "feature_map.pkl") - # Save config config = {"target": "target", "algorithms": ["xgb"]} import yaml with open(config_dir / "model.yml", "w") as f: @@ -328,45 +92,6 @@ def mock_xgb_model(tmp_path): } -# ============================================================================ -# Environment Fixtures -# ============================================================================ - -@pytest.fixture -def mock_alpaca_env(): - """Set mock Alpaca environment variables.""" - with patch.dict(os.environ, { - "ALPACA_API_KEY": "test-api-key", - "ALPACA_API_SECRET": "test-api-secret", - "ALPACA_PAPER": "true", - }): - yield - - -@pytest.fixture -def mock_polygon_env(): - """Set mock Polygon environment variable.""" - with patch.dict(os.environ, { - "POLYGON_API_KEY": "test-polygon-key", - }): - yield - - -@pytest.fixture -def mock_anthropic_env(): - """Set mock Anthropic environment variable.""" - with patch.dict(os.environ, { - "ANTHROPIC_API_KEY": "test-anthropic-key", - }): - yield - - -@pytest.fixture -def mock_all_env(mock_alpaca_env, mock_polygon_env, mock_anthropic_env): - """Set all mock environment variables.""" - yield - - # ============================================================================ # Configuration Fixtures # ============================================================================ @@ -374,7 +99,7 @@ def mock_all_env(mock_alpaca_env, mock_polygon_env, mock_anthropic_env): @pytest.fixture def mock_alphapy_config(temp_dir): """Create a mock AlphaPy configuration for testing.""" - config = { + return { 'directory': temp_dir, 'file_extension': 'csv', 'separator': ',', @@ -390,142 +115,3 @@ def mock_alphapy_config(temp_dir): 'test_size': 0.2, 'validation_size': 0.2, } - return config - - -@pytest.fixture -def sample_agent_config(): - """Sample agent configuration.""" - return { - "model": "claude-sonnet-4-20250514", - "max_tokens": 4096, - "temperature": 0.7, - "symbols_stocks": ["AAPL", "TSLA"], - "symbols_crypto": ["BTC/USD"], - "timeframe": "5Min", - "bar_lookback": 100, - "run_dir": "projects/test/runs/latest", - "algo": "xgb", - "prob_min": 0.55, - "max_position_value": 5000.0, - "max_portfolio_exposure": 25000.0, - "max_positions": 5, - "daily_loss_limit": 0.02, - "loop_interval_seconds": 300, - } - - -@pytest.fixture -def sample_risk_config(): - """Sample risk configuration.""" - return { - "max_position_value": 5000.0, - "max_portfolio_exposure": 25000.0, - "max_positions": 5, - "max_symbol_pct": 0.25, - "daily_loss_limit": 0.02, - "position_stop_loss": 0.01, - "min_order_value": 100.0, - } - - -# ============================================================================ -# Indicator Test Fixtures -# ============================================================================ - -@pytest.fixture -def indicator_test_data(): - """Create test data specifically for indicator validation.""" - # Use deterministic data for indicator tests - n = 200 - np.random.seed(42) - - # Trending data with noise - trend = np.linspace(100, 150, n) - noise = np.random.normal(0, 2, n) - prices = trend + noise - - # Add some volatility spikes - prices[50:60] += np.random.normal(0, 5, 10) - prices[150:160] -= np.random.normal(0, 5, 10) - - return pl.DataFrame({ - "datetime": [datetime.now() - timedelta(minutes=5 * i) for i in range(n)][::-1], - "open": prices * (1 + np.random.uniform(-0.002, 0.002, n)), - "high": prices * (1 + np.abs(np.random.normal(0, 0.01, n))), - "low": prices * (1 - np.abs(np.random.normal(0, 0.01, n))), - "close": prices, - "volume": np.random.randint(100000, 1000000, n), - }) - - -# ============================================================================ -# Portfolio Test Fixtures -# ============================================================================ - -@pytest.fixture -def sample_positions_dict(): - """Sample positions dictionary for portfolio tests.""" - return [ - { - "symbol": "AAPL", - "qty": 100.0, - "side": "long", - "avg_entry_price": 145.0, - "current_price": 150.0, - "market_value": 15000.0, - "cost_basis": 14500.0, - "unrealized_pl": 500.0, - "unrealized_plpc": 0.0345, - "change_today": 0.02, - }, - { - "symbol": "TSLA", - "qty": 50.0, - "side": "long", - "avg_entry_price": 200.0, - "current_price": 210.0, - "market_value": 10500.0, - "cost_basis": 10000.0, - "unrealized_pl": 500.0, - "unrealized_plpc": 0.05, - "change_today": -0.01, - }, - ] - - -# ============================================================================ -# Trading Test Fixtures -# ============================================================================ - -@pytest.fixture -def sample_signals(): - """Sample trading signals for testing.""" - return { - "AAPL": { - "prediction": 1, - "probability": 0.72, - "signal": "long", - "latest_close": 150.0, - "latest_time": str(datetime.now()), - }, - "TSLA": { - "prediction": 0, - "probability": 0.45, - "signal": "none", - "latest_close": 210.0, - "latest_time": str(datetime.now()), - }, - } - - -@pytest.fixture -def sample_order_params(): - """Sample order parameters for testing.""" - return { - "symbol": "AAPL", - "side": "buy", - "quantity": 10, - "order_type": "market", - "time_in_force": "day", - } diff --git a/tests/test_imports.py b/tests/test_imports.py index 67d6d4c..53dc78f 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -52,13 +52,19 @@ def test_features_import(self): raise -class TestOptionalModules: - """Test optional modules that might have external dependencies.""" +class TestSupportingModules: + """Test supporting modules that are still part of the package.""" - def test_market_flow_import(self): - """Test market flow module import.""" + def test_variables_import(self): + """Test variables module import.""" try: - from alphapy import market_flow - assert market_flow is not None + from alphapy import variables + assert variables is not None except ImportError as e: - pytest.skip(f"Market flow module not available: {e}") \ No newline at end of file + pytest.skip(f"Skipping variables import due to optional dependency issue: {e}") + + def test_calendrical_import(self): + """Test calendrical module import (now hosts dateparts/timeparts/bizday).""" + from alphapy import calendrical + assert calendrical is not None + assert hasattr(calendrical, "dateparts")