Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion alphapy/alphapy_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def training_pipeline(alphapy_specs, model):
drop = model.specs['drop']
extension = model.specs['extension']
fs_lofo = model.specs['fs_lofo']
fs_lofo_algorithms = model.specs.get('fs_lofo_algorithms', [])
fs_univariate = model.specs['fs_univariate']
group_id = model.specs['rank_group_id']
grid_search = model.specs['grid_search']
Expand Down Expand Up @@ -358,7 +359,7 @@ def training_pipeline(alphapy_specs, model):
failed_algos.append(algo)
if est is not None:
# select LOFO features
if fs_lofo:
if fs_lofo and (not fs_lofo_algorithms or algo in fs_lofo_algorithms):
select_features_lofo(model, algo, est)
# run classic train/test model pipeline
model = first_fit(model, algo, est)
Expand Down
58 changes: 52 additions & 6 deletions alphapy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ def get_model_config(directory='.'):
specs['iso_neighbors'] = features_section['isomap']['neighbors']
# LOFO
specs['fs_lofo'] = features_section['lofo']['option']
specs['fs_lofo_algorithms'] = features_section['lofo'].get('algorithms', [])
# log transformation
specs['log_transform'] = features_section['log_transform']['option']
specs['pvalue_level'] = features_section['log_transform']['pvalue_level']
Expand Down Expand Up @@ -455,6 +456,7 @@ def get_model_config(directory='.'):
logger.info('factors = %s', specs['factors'])
logger.info('features [X] = %s', specs['features'])
logger.info('fs_lofo = %r', specs['fs_lofo'])
logger.info('fs_lofo_algorithms= %r', specs['fs_lofo_algorithms'])
logger.info('fs_univariate = %r', specs['fs_univariate'])
logger.info('fs_uni_grid = %s', specs['fs_uni_grid'])
logger.info('fs_uni_pct = %d', specs['fs_uni_pct'])
Expand Down Expand Up @@ -674,13 +676,52 @@ def save_feature_map(model):
# Save model object

logger.info("Writing feature map to %s", full_path)
if model.lofo_df:
model.feature_map['lofo_importances'] = model.lofo_df.copy()
joblib.dump(model.feature_map, full_path)


#
# Function first_fit
#

def get_feature_selection_support(model, algo):
r"""Get the effective pre-model feature selection support for an algorithm.

Feature selection is staged in AlphaPy. Univariate selection is global, LOFO
is algorithm-specific and computed after univariate selection, and RFE is
handled separately in ``model.support``. This helper returns the feature mask
that should be applied before fitting or predicting with the base estimator.
If LOFO rejects every feature, fall back to the univariate mask so the
estimator never receives an empty matrix.
"""

try:
support = model.feature_map['support_lofo', algo]
if np.any(support):
return support
except KeyError:
pass

try:
support = model.feature_map['support_uni']
if np.any(support):
return support
except KeyError:
pass

return None


def apply_feature_selection_support(model, algo, X):
r"""Apply the effective pre-model feature selection support to ``X``."""

support = get_feature_selection_support(model, algo)
if support is None:
return X
return X[:, support]


def first_fit(model, algo, est):
r"""Fit the model before optimization.

Expand Down Expand Up @@ -725,7 +766,7 @@ def first_fit(model, algo, est):

# Extract model data.

X_train = model.X_train
X_train = apply_feature_selection_support(model, algo, model.X_train)
y_train = model.y_train
groups_train = model.groups_train

Expand Down Expand Up @@ -784,7 +825,11 @@ def first_fit(model, algo, est):
model.estimators[algo] = est

# Copy feature name master into feature names per algorithm
model.fnames_algo[algo] = model.feature_names
support = get_feature_selection_support(model, algo)
if support is None:
model.fnames_algo[algo] = model.feature_names
else:
model.fnames_algo[algo] = list(np.array(model.feature_names)[support])

# Record importances and coefficients if necessary.

Expand Down Expand Up @@ -840,13 +885,14 @@ def make_predictions(model, algo):

# Extract model data

X_train = apply_feature_selection_support(model, algo, model.X_train)
X_test = apply_feature_selection_support(model, algo, model.X_test)
try:
support = model.support[algo]
X_train = model.X_train[:, support]
X_test = model.X_test[:, support]
X_train = X_train[:, support]
X_test = X_test[:, support]
except:
X_train = model.X_train
X_test = model.X_test
pass

y_train = model.y_train
groups_train = model.groups_train
Expand Down
6 changes: 4 additions & 2 deletions alphapy/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#

from alphapy.globals import ModelType
from alphapy.model import apply_feature_selection_support

from contextlib import contextmanager
from datetime import datetime
Expand Down Expand Up @@ -292,11 +293,12 @@ def hyper_grid_search(model, estimator):
est = model.estimators[algo]

# Extract model data
X_train = apply_feature_selection_support(model, algo, model.X_train)
try:
support = model.support[algo]
X_train = model.X_train[:, support]
X_train = X_train[:, support]
except:
X_train = model.X_train
pass
y_train = model.y_train

# Clone estimator and set to single-threaded to avoid nested parallelism with Optuna
Expand Down
2 changes: 1 addition & 1 deletion alphapy/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def plot_importances(model, partition):
for algo in model.algolist:
tag = USEP.join([pstring, algo])
# LOFO Feature Importances
if fs_lofo:
if fs_lofo and algo in model.lofo_df:
logger.info("LOFO Importances for Algorithm: %s", algo)
importance_df = model.lofo_df[algo].copy()
importance_df['color'] = (importance_df['importance_mean'] > 0).map({True: 'g', False: 'r'})
Expand Down
79 changes: 79 additions & 0 deletions tests/test_model_feature_selection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import polars as pl
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin

from alphapy.globals import ModelType
from alphapy.model import first_fit
from alphapy.model import Model


class ShapeRecordingClassifier(BaseEstimator, ClassifierMixin):
_estimator_type = "classifier"

def fit(self, X, y):
self.n_features_seen_ = X.shape[1]
self.classes_ = sorted(set(y))
return self

def predict(self, X):
return [self.classes_[0]] * X.shape[0]


def make_model():
model = Model(
{
"algorithms": ["TEST"],
"cv_folds": 2,
"esr": 10,
"model_type": ModelType.classification,
"n_jobs": 1,
"scorer": "accuracy",
"seed": 13,
"shuffle": False,
"split": 0.25,
"ts_option": False,
"verbosity": 0,
}
)
model.X_train = pl.DataFrame(
{
"a": [0, 1, 2, 3, 4, 5],
"b": [5, 4, 3, 2, 1, 0],
"c": [1, 1, 0, 0, 1, 0],
}
)
model.y_train = pl.DataFrame({"target": [0, 1, 0, 1, 0, 1]})
model.feature_names = ["a", "b", "c"]
return model


def test_first_fit_applies_univariate_support():
model = make_model()
model.feature_map["support_uni"] = [True, False, True]

model = first_fit(model, "TEST", ShapeRecordingClassifier())

assert model.estimators["TEST"].n_features_seen_ == 2
assert model.fnames_algo["TEST"] == ["a", "c"]


def test_first_fit_prefers_lofo_support_over_univariate_support():
model = make_model()
model.feature_map["support_uni"] = [True, True, True]
model.feature_map["support_lofo", "TEST"] = [False, True, False]

model = first_fit(model, "TEST", ShapeRecordingClassifier())

assert model.estimators["TEST"].n_features_seen_ == 1
assert model.fnames_algo["TEST"] == ["b"]


def test_first_fit_uses_univariate_support_when_lofo_selects_no_features():
model = make_model()
model.feature_map["support_uni"] = [True, False, True]
model.feature_map["support_lofo", "TEST"] = [False, False, False]

model = first_fit(model, "TEST", ShapeRecordingClassifier())

assert model.estimators["TEST"].n_features_seen_ == 2
assert model.fnames_algo["TEST"] == ["a", "c"]
Loading