From e4657bfcf9227d7ccbb8794ef8c98ad885175810 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Wed, 29 May 2024 21:57:34 -0700
Subject: [PATCH 1/7] loosening numpy and pandas versions

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index de708f8..a898c17 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "ISLP"
 dependencies = ["numpy>=1.7.1",
                "scipy>=0.9",
-               "pandas>=0.20",
+               "pandas>=1.5",
                "lxml", # pandas needs this for html
                "scikit-learn>=1.2",
                "joblib",
@@ -15,7 +15,7 @@ dependencies = ["numpy>=1.7.1",
                ]
 description  = "Library for ISLP labs"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 license = {file = "LICENSE"}
 keywords = []
 authors = [

From a5d54b3c9816b0194bbee119b79bff70ca0b56e7 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 31 Jan 2026 16:27:28 -0800
Subject: [PATCH 2/7] fixes to build requirements

---
 pyproject.toml | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a898c17..aa30d72 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,23 @@ classifiers = ["Development Status :: 3 - Alpha",
 	       ]
 dynamic = ["version"]
 
+[tool.setuptools]
+packages = [
+    "ISLP",
+    "ISLP.models", 
+    "ISLP.bart",
+    "ISLP.torch",
+    "ISLP.data"
+]
+include-package-data = true
+
+[tool.setuptools.package-data]
+ISLP = ["data/*.csv", "data/*.npy", "data/*.data"]
+
+[tool.setuptools.dynamic]
+version = {attr = "ISLP.__version__"}  # Assuming ISLP.__version__ holds your version
+
+
 [project.urls]  # Optional
 "Homepage" = "https://github.com/intro-stat-learning/ISLP"
 "Bug Reports" = "https://github.com/intro-stat-learning/ISLP/issues"
@@ -51,8 +68,14 @@ doc = ['Sphinx>=3.0']
 [build-system]
 requires = ["setuptools>=42",
             "wheel",
-	    "versioneer[toml]",
 	    "Sphinx>=1.0"
+            "numpy",
+            "pandas",
+            "scipy",
+            "scikit-learn",
+            "joblib",
+            "statsmodels",
+	    "versioneer[toml]"
 	    ]
 build-backend = "setuptools.build_meta"
 

From 273a72765e9f875753b4de1b8fb318bd9cc6eb24 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 31 Jan 2026 16:28:31 -0800
Subject: [PATCH 3/7] simplifying setup.py, logic in pyproject.toml

---
 setup.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/setup.py b/setup.py
index e068529..e0710d3 100755
--- a/setup.py
+++ b/setup.py
@@ -27,21 +27,21 @@
 
 def main(**extra_args):
     setup(version=versioneer.get_version(),
-          packages     = ['ISLP',
-                          'ISLP.models',
-                          'ISLP.models',
-                          'ISLP.bart',
-                          'ISLP.torch',
-                          'ISLP.data'
-                          ],
-          ext_modules = EXTS,
-          package_data = {"ISLP":["data/*csv", "data/*npy", "data/*data"]},
-          include_package_data=True,
-          data_files=[],
-          scripts=[],
-          long_description=long_description,
-          cmdclass = cmdclass,
-          **extra_args
+          # packages     = ['ISLP',
+          #                 'ISLP.models',
+          #                 'ISLP.models',
+          #                 'ISLP.bart',
+          #                 'ISLP.torch',
+          #                 'ISLP.data'
+          #                 ],
+          # ext_modules = EXTS,
+          # package_data = {"ISLP":["data/*csv", "data/*npy", "data/*data"]},
+          # include_package_data=True,
+          # data_files=[],
+          # scripts=[],
+          # long_description=long_description,
+          # cmdclass = cmdclass,
+          # **extra_args
          )
 
 #simple way to test what setup will do

From 4281ada66688c196aed6a5fcb998585e13fa0688 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Sat, 31 Jan 2026 16:37:11 -0800
Subject: [PATCH 4/7] fixing _estimator_type flags

---
 ISLP/models/generic_selector.py | 40 +++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/ISLP/models/generic_selector.py b/ISLP/models/generic_selector.py
index 37b1f93..7c9329e 100644
--- a/ISLP/models/generic_selector.py
+++ b/ISLP/models/generic_selector.py
@@ -28,7 +28,10 @@
 import scipy as sp
 
 from sklearn.metrics import get_scorer
-from sklearn.base import (clone, MetaEstimatorMixin)
+from sklearn.base import (clone,
+                          MetaEstimatorMixin,
+                          is_classifier,
+                          is_regressor)
 from sklearn.model_selection import cross_val_score
 from joblib import Parallel, delayed
 
@@ -149,13 +152,13 @@ def __init__(self,
         self.scoring = scoring
 
         if scoring is None:
-            if self.est_._estimator_type == 'classifier':
+            if is_classifier(self.est_):
                 scoring = 'accuracy'
-            elif self.est_._estimator_type == 'regressor':
+            elif is_regressor(self.est_):
                 scoring = 'r2'
             else:
-                raise AttributeError('Estimator must '
-                                     'be a Classifier or Regressor.')
+                scoring = None
+                
         if isinstance(scoring, str):
             self.scorer = get_scorer(scoring)
         else:
@@ -166,7 +169,7 @@ def __init__(self,
         # don't mess with this unless testing
         self._TESTING_INTERRUPT_MODE = False
 
-    def fit(self, X, y, groups=None, **params):
+    def fit(self, X, y, groups=None, **fit_params):
         """Perform feature selection and learn model from training data.
 
         Parameters
@@ -183,7 +186,7 @@ def fit(self, X, y, groups=None, **params):
         groups: array-like, with shape (n_samples,), optional
             Group labels for the samples used while splitting the dataset into
             train/test set. Passed to the fit method of the cross-validator.
-        params: various, optional
+        fit_params: various, optional
             Additional parameters that are being passed to the estimator.
             For example, `sample_weights=weights`.
 
@@ -218,7 +221,7 @@ def fit(self, X, y, groups=None, **params):
                                       groups=groups,
                                       cv=self.cv,
                                       pre_dispatch=self.pre_dispatch,
-                                      **params)
+                                      **fit_params)
 
         # keep a running track of the best state
 
@@ -242,7 +245,7 @@ def fit(self, X, y, groups=None, **params):
                                             X,
                                             y,
                                             groups=groups,
-                                            **params)
+                                            **fit_params)
                 iteration += 1
                 cur, best_, self.finished_ = self.update_results_check(results_,
                                                                        self.path_,
@@ -287,7 +290,7 @@ def fit_transform(self,
                       X,
                       y,
                       groups=None,
-                      **params):
+                      **fit_params):
         """Fit to training data then reduce X to its most important features.
 
         Parameters
@@ -304,7 +307,7 @@ def fit_transform(self,
         groups: array-like, with shape (n_samples,), optional
             Group labels for the samples used while splitting the dataset into
             train/test set. Passed to the fit method of the cross-validator.
-        params: various, optional
+        fit_params: various, optional
             Additional parameters that are being passed to the estimator.
             For example, `sample_weights=weights`.
 
@@ -313,7 +316,7 @@ def fit_transform(self,
         Reduced feature subset of X, shape={n_samples, k_features}
 
         """
-        self.fit(X, y, groups=groups, **params)
+        self.fit(X, y, groups=groups, **fit_params)
         return self.transform(X)
 
     def get_metric_dict(self, confidence_interval=0.95):
@@ -368,7 +371,7 @@ def _batch(self,
                X,
                y,
                groups=None,
-               **params):
+               **fit_params):
 
         results = []
 
@@ -388,7 +391,7 @@ def _batch(self,
                              groups=groups,
                              cv=self.cv,
                              pre_dispatch=self.pre_dispatch,
-                             **params)
+                             **fit_params)
                             for state in candidates)
 
             for state, scores in work:
@@ -484,8 +487,11 @@ def _calc_score(estimator,
                 groups=None,
                 cv=None,
                 pre_dispatch='2*n_jobs',
-                **params):
+                **fit_params):
     
+    if scorer is None:
+        scorer = lambda estimator, X, y: estimator.score(X, y)
+
     X_state = build_submodel(X, state)
 
     if cv:
@@ -497,11 +503,11 @@ def _calc_score(estimator,
                                  scoring=scorer,
                                  n_jobs=1,
                                  pre_dispatch=pre_dispatch,
-                                 params=params)
+                                 fit_params=fit_params)
     else:
         estimator.fit(X_state,
                       y,
-                      **params)
+                      **fit_params)
         scores = np.array([scorer(estimator,
                                   X_state,
                                   y)])

From 4d9f0286be6c94834c9a0657a0aa7c42daf0093f Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 2 Feb 2026 12:34:49 -0800
Subject: [PATCH 5/7] feat: Add scikit-learn estimator type tags

Adds the `__sklearn_tags__` method to the `sklearn_sm` and `sklearn_selected` wrappers. This allows scikit-learn to correctly identify the estimator type (regressor or classifier) based on the statsmodels model.

This change enables the use of scikit-learn's cross-validation and model selection tools with these wrappers.

Tests have been added to verify that OLS and GLM Binomial models are correctly identified.
---
 ISLP/models/sklearn_wrap.py       | 15 +++++++++-
 pyproject.toml                    |  2 +-
 tests/models/test_sklearn_wrap.py | 46 +++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100644 tests/models/test_sklearn_wrap.py

diff --git a/ISLP/models/sklearn_wrap.py b/ISLP/models/sklearn_wrap.py
index 123130b..121da75 100644
--- a/ISLP/models/sklearn_wrap.py
+++ b/ISLP/models/sklearn_wrap.py
@@ -49,7 +49,17 @@ def __init__(self,
         self.model_type = model_type
         self.model_spec = model_spec
         self.model_args = model_args
-        
+
+    def __sklearn_tags__(self):    
+        tags = super().__sklearn_tags__()
+        if self.model_type == sm.OLS:
+            tags.estimator_type = 'regressor'
+        elif (issubclass(self.model_type, sm.GLM) and
+              'family' in self.model_args and
+              isinstance(self.model_args.get('family', None), sm.families.Binomial)):
+            tags.estimator_type = 'classifier'
+        return tags
+
     def fit(self, X, y):
         """
         Fit a statsmodel model
@@ -171,6 +181,9 @@ def __init__(self,
         self.cv = cv
         self.scoring = scoring
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        return tags
                                      
     def fit(self, X, y):
         """
diff --git a/pyproject.toml b/pyproject.toml
index aa30d72..5fe63fd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,7 @@ doc = ['Sphinx>=3.0']
 [build-system]
 requires = ["setuptools>=42",
             "wheel",
-	    "Sphinx>=1.0"
+	    "Sphinx>=1.0",
             "numpy",
             "pandas",
             "scipy",
diff --git a/tests/models/test_sklearn_wrap.py b/tests/models/test_sklearn_wrap.py
new file mode 100644
index 0000000..c3616bd
--- /dev/null
+++ b/tests/models/test_sklearn_wrap.py
@@ -0,0 +1,46 @@
+
+import numpy as np
+import pandas as pd
+import statsmodels.api as sm
+from sklearn.base import is_classifier, is_regressor
+import pytest
+
+from ISLP.models.sklearn_wrap import sklearn_sm, sklearn_selected
+from ISLP.models.model_spec import ModelSpec
+from ISLP.models.strategy import min_max
+
+@pytest.fixture
+def model_setup():
+    X = pd.DataFrame({'X1': np.random.rand(10), 'X2': np.random.rand(10), 'X3': np.random.rand(10)})
+    y = pd.Series(np.random.randint(0, 2, 10)) # For classifier
+    model_spec_dummy = ModelSpec(['X1', 'X2', 'X3']).fit(X)
+    min_max_strategy_dummy = min_max(model_spec_dummy, min_terms=1, max_terms=2)
+    return X, y, model_spec_dummy, min_max_strategy_dummy
+
+def test_OLS_is_regressor():
+    model = sklearn_sm(sm.OLS)
+    assert model.__sklearn_tags__().estimator_type == 'regressor'
+    assert is_regressor(model)
+
+def test_GLM_binomial_is_classifier():
+    model = sklearn_sm(sm.GLM, model_args={'family': sm.families.Binomial()})
+    assert model.__sklearn_tags__().estimator_type == 'classifier'
+    assert is_classifier(model)
+
+def test_GLM_binomial_probit_is_classifier():
+    model = sklearn_sm(sm.GLM, model_args={'family': sm.families.Binomial(link=sm.families.links.Probit())})
+    assert model.__sklearn_tags__().estimator_type == 'classifier'
+    assert is_classifier(model)
+
+
+def test_selected_OLS_is_regressor(model_setup):
+    X, y, model_spec_dummy, min_max_strategy_dummy = model_setup
+    model = sklearn_selected(sm.OLS, strategy=min_max_strategy_dummy)
+    assert model.__sklearn_tags__().estimator_type == 'regressor'
+    assert is_regressor(model)
+
+def test_selected_GLM_binomial_is_classifier(model_setup):
+    X, y, model_spec_dummy, min_max_strategy_dummy = model_setup
+    model = sklearn_selected(sm.GLM, strategy=min_max_strategy_dummy, model_args={'family': sm.families.Binomial()})
+    assert model.__sklearn_tags__().estimator_type == 'classifier'
+    assert is_classifier(model)

From 2beabce8cc2f027cd3f44eb158f47f1b4a40a9c3 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 2 Feb 2026 12:44:26 -0800
Subject: [PATCH 6/7] removing redundant setup.py

---
 setup.py | 50 --------------------------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100755 setup.py

diff --git a/setup.py b/setup.py
deleted file mode 100755
index e0710d3..0000000
--- a/setup.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python
-''' Installation script for ISLP package '''
-
-import os
-import sys
-from os.path import join as pjoin, dirname, exists
-# BEFORE importing distutils, remove MANIFEST. distutils doesn't properly
-# update it when the contents of directories change.
-if exists('MANIFEST'): os.remove('MANIFEST')
-
-# Unconditionally require setuptools
-import setuptools
-
-# Package for getting versions from git tags
-import versioneer
-
-from setuptools import setup
-
-# Define extensions
-EXTS = []
-
-cmdclass = versioneer.get_cmdclass()
-
-# get long_description
-
-long_description = open('README.md', 'rt', encoding='utf-8').read()
-
-def main(**extra_args):
-    setup(version=versioneer.get_version(),
-          # packages     = ['ISLP',
-          #                 'ISLP.models',
-          #                 'ISLP.models',
-          #                 'ISLP.bart',
-          #                 'ISLP.torch',
-          #                 'ISLP.data'
-          #                 ],
-          # ext_modules = EXTS,
-          # package_data = {"ISLP":["data/*csv", "data/*npy", "data/*data"]},
-          # include_package_data=True,
-          # data_files=[],
-          # scripts=[],
-          # long_description=long_description,
-          # cmdclass = cmdclass,
-          # **extra_args
-         )
-
-#simple way to test what setup will do
-#python setup.py install --prefix=/tmp
-if __name__ == "__main__":
-    main()

From bcd85989111f50bca3667cc123708b571614cfc6 Mon Sep 17 00:00:00 2001
From: Jonathan Taylor <jonathan.taylor@stanford.edu>
Date: Mon, 2 Feb 2026 12:49:40 -0800
Subject: [PATCH 7/7] unused pkg_resources import

---
 ISLP/torch/imdb.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ISLP/torch/imdb.py b/ISLP/torch/imdb.py
index 617489d..3dfacfe 100644
--- a/ISLP/torch/imdb.py
+++ b/ISLP/torch/imdb.py
@@ -12,7 +12,6 @@
 import torch
 from torch.utils.data import TensorDataset
 from scipy.sparse import load_npz
-from pkg_resources import resource_filename
 from pickle import load as load_pickle
 import urllib