From 147695e3486a5dc3a726aab6075c737e21925b37 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 4 Mar 2024 15:51:31 -0800
Subject: [PATCH 1/4] Allow sklearn to update to current version.

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index 74d704a..3b92e16 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,7 +39,7 @@ install_requires =
     pandas==2.1.4
     requests
     seaborn==0.13.0
-    scikit-learn==1.2.1
+    scikit-learn>=1.2.1
     sklearn_pandas>=2.0.0
     tables==3.9.1
     tqdm

From 9c495241bbb73024ee075eac8a416fe0de135ba2 Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 4 Mar 2024 15:59:09 -0800
Subject: [PATCH 2/4] Replaces deprecated utlity decorator.

---
 afqinsight/_serial_bagging.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py
index fe06d6e..5d5a85a 100644
--- a/afqinsight/_serial_bagging.py
+++ b/afqinsight/_serial_bagging.py
@@ -25,7 +25,7 @@
 from sklearn.ensemble._base import _partition_estimators
 from sklearn.utils import check_random_state, check_array, indices_to_mask, resample
 from sklearn.utils.random import sample_without_replacement
-from sklearn.utils.metaestimators import if_delegate_has_method
+from sklearn.utils.metaestimators import available_if
 from sklearn.utils.validation import (
     check_is_fitted,
     _check_sample_weight,
@@ -610,7 +610,7 @@ def predict_log_proba(self, X):
         else:
             return np.log(self.predict_proba(X))
 
-    @if_delegate_has_method(delegate="base_estimator")
+    @available_if(lambda self: hasattr(self, "base_estimator"))
     def decision_function(self, X):
         """Average of the decision functions of the base classifiers.
 

From 09161074fbe198546f1ac1a6696d41e3a1de382f Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 4 Mar 2024 17:12:51 -0800
Subject: [PATCH 3/4] Update: "base_estimator" => "estimator".

---
 afqinsight/_serial_bagging.py    | 28 ++++++------
 afqinsight/tests/test_bagging.py | 78 ++++++++++++++++----------------
 2 files changed, 52 insertions(+), 54 deletions(-)

diff --git a/afqinsight/_serial_bagging.py b/afqinsight/_serial_bagging.py
index 5d5a85a..2dd4382 100644
--- a/afqinsight/_serial_bagging.py
+++ b/afqinsight/_serial_bagging.py
@@ -103,7 +103,7 @@ def _parallel_build_estimators(
     max_samples = ensemble._max_samples
     bootstrap = ensemble.bootstrap
     bootstrap_features = ensemble.bootstrap_features
-    support_sample_weight = has_fit_parameter(ensemble.base_estimator_, "sample_weight")
+    support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight")
     if not support_sample_weight and sample_weight is not None:
         raise ValueError("The base estimator doesn't support sample weight")
 
@@ -182,7 +182,7 @@ class SerialBaggingClassifier(BaggingClassifier):
 
     Parameters
     ----------
-    base_estimator : object, default=None
+    estimator : object, default=None
         The base estimator to fit on random subsets of the dataset.
         If None, then the base estimator is a decision tree.
 
@@ -236,7 +236,7 @@ class SerialBaggingClassifier(BaggingClassifier):
 
     Attributes
     ----------
-    base_estimator_ : estimator
+    estimator_ : estimator
         The base estimator from which the ensemble is grown.
 
     n_features_in_ : int
@@ -287,7 +287,7 @@ class SerialBaggingClassifier(BaggingClassifier):
 
     def __init__(
         self,
-        base_estimator=None,
+        estimator=None,
         n_estimators=10,
         *,
         max_samples=1.0,
@@ -301,7 +301,7 @@ def __init__(
         verbose=0,
     ):
         super().__init__(
-            base_estimator=base_estimator,
+            estimator=estimator,
             n_estimators=n_estimators,
             max_samples=max_samples,
             max_features=max_features,
@@ -367,7 +367,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         self._validate_estimator()
 
         if max_depth is not None:  # pragma: no cover
-            self.base_estimator_.max_depth = max_depth
+            self.estimator_.max_depth = max_depth
 
         # Validate max_samples
         if max_samples is None:  # pragma: no cover
@@ -568,7 +568,7 @@ def predict_log_proba(self, X):
             classes corresponds to that in the attribute :term:`classes_`.
         """
         check_is_fitted(self)
-        if hasattr(self.base_estimator_, "predict_log_proba"):
+        if hasattr(self.estimator_, "predict_log_proba"):
             # Check data
             X = check_array(
                 X, accept_sparse=["csr", "csc"], dtype=None, force_all_finite=False
@@ -610,7 +610,7 @@ def predict_log_proba(self, X):
         else:
             return np.log(self.predict_proba(X))
 
-    @available_if(lambda self: hasattr(self, "base_estimator"))
+    @available_if(lambda self: hasattr(self, "estimator"))
     def decision_function(self, X):
         """Average of the decision functions of the base classifiers.
 
@@ -690,7 +690,7 @@ class SerialBaggingRegressor(BaggingRegressor):
 
     Parameters
     ----------
-    base_estimator : object, default=None
+    estimator : object, default=None
         The base estimator to fit on random subsets of the dataset.
         If None, then the base estimator is a decision tree.
 
@@ -745,7 +745,7 @@ class SerialBaggingRegressor(BaggingRegressor):
 
     Attributes
     ----------
-    base_estimator_ : estimator
+    estimator_ : estimator
         The base estimator from which the ensemble is grown.
 
     n_features_in_ : int
@@ -780,7 +780,7 @@ class SerialBaggingRegressor(BaggingRegressor):
     >>> X, y = make_regression(n_samples=100, n_features=4,
     ...                        n_informative=2, n_targets=1,
     ...                        random_state=0, shuffle=False)
-    >>> regr = BaggingRegressor(base_estimator=SVR(),
+    >>> regr = BaggingRegressor(estimator=SVR(),
     ...                         n_estimators=10, random_state=0).fit(X, y)
     >>> regr.predict([[0, 0, 0, 0]])
     array([-2.8720...])
@@ -803,7 +803,7 @@ class SerialBaggingRegressor(BaggingRegressor):
 
     def __init__(
         self,
-        base_estimator=None,
+        estimator=None,
         n_estimators=10,
         max_samples=1.0,
         max_features=1.0,
@@ -816,7 +816,7 @@ def __init__(
         verbose=0,
     ):
         super().__init__(
-            base_estimator=base_estimator,
+            estimator=estimator,
             n_estimators=n_estimators,
             max_samples=max_samples,
             max_features=max_features,
@@ -881,7 +881,7 @@ def _fit(self, X, y, max_samples=None, max_depth=None, sample_weight=None):
         self._validate_estimator()
 
         if max_depth is not None:  # pragma: no cover
-            self.base_estimator_.max_depth = max_depth
+            self.estimator_.max_depth = max_depth
 
         # Validate max_samples
         if max_samples is None:  # pragma: no cover
diff --git a/afqinsight/tests/test_bagging.py b/afqinsight/tests/test_bagging.py
index 09bd10e..354cfa7 100644
--- a/afqinsight/tests/test_bagging.py
+++ b/afqinsight/tests/test_bagging.py
@@ -69,7 +69,7 @@ def test_classification():
         }
     )
 
-    for base_estimator in [
+    for estimator in [
         None,
         DummyClassifier(),
         Perceptron(),
@@ -79,7 +79,7 @@ def test_classification():
     ]:
         for params in grid:
             SerialBaggingClassifier(
-                base_estimator=base_estimator, random_state=rng, **params
+                estimator=estimator, random_state=rng, **params
             ).fit(X_train, y_train).predict(X_test)
 
 
@@ -127,7 +127,7 @@ def fit(self, X, y):
             ]:
                 # Trained on sparse format
                 sparse_classifier = SerialBaggingClassifier(
-                    base_estimator=CustomSVC(decision_function_shape="ovr"),
+                    estimator=CustomSVC(decision_function_shape="ovr"),
                     random_state=1,
                     **params,
                 ).fit(X_train_sparse, y_train)
@@ -135,7 +135,7 @@ def fit(self, X, y):
 
                 # Trained on dense format
                 dense_classifier = SerialBaggingClassifier(
-                    base_estimator=CustomSVC(decision_function_shape="ovr"),
+                    estimator=CustomSVC(decision_function_shape="ovr"),
                     random_state=1,
                     **params,
                 ).fit(X_train, y_train)
@@ -163,7 +163,7 @@ def test_regression():
         }
     )
 
-    for base_estimator in [
+    for estimator in [
         None,
         DummyRegressor(),
         DecisionTreeRegressor(),
@@ -171,9 +171,9 @@ def test_regression():
         SVR(),
     ]:
         for params in grid:
-            SerialBaggingRegressor(
-                base_estimator=base_estimator, random_state=rng, **params
-            ).fit(X_train, y_train).predict(X_test)
+            SerialBaggingRegressor(estimator=estimator, random_state=rng, **params).fit(
+                X_train, y_train
+            ).predict(X_test)
 
 
 def test_sparse_regression():
@@ -214,15 +214,13 @@ def fit(self, X, y):
         for params in parameter_sets:
             # Trained on sparse format
             sparse_classifier = SerialBaggingRegressor(
-                base_estimator=CustomSVR(), random_state=1, **params
+                estimator=CustomSVR(), random_state=1, **params
             ).fit(X_train_sparse, y_train)
             sparse_results = sparse_classifier.predict(X_test_sparse)
 
             # Trained on dense format
             dense_results = (
-                SerialBaggingRegressor(
-                    base_estimator=CustomSVR(), random_state=1, **params
-                )
+                SerialBaggingRegressor(estimator=CustomSVR(), random_state=1, **params)
                 .fit(X_train, y_train)
                 .predict(X_test)
             )
@@ -251,33 +249,33 @@ def test_bootstrap_samples():
         diabetes.data, diabetes.target, random_state=rng
     )
 
-    base_estimator = DecisionTreeRegressor().fit(X_train, y_train)
+    estimator = DecisionTreeRegressor().fit(X_train, y_train)
 
     # without bootstrap, all trees are perfect on the training set
     ensemble = SerialBaggingRegressor(
-        base_estimator=DecisionTreeRegressor(),
+        estimator=DecisionTreeRegressor(),
         max_samples=1.0,
         bootstrap=False,
         random_state=rng,
     ).fit(X_train, y_train)
 
-    assert base_estimator.score(X_train, y_train) == ensemble.score(X_train, y_train)
+    assert estimator.score(X_train, y_train) == ensemble.score(X_train, y_train)
 
     # with bootstrap, trees are no longer perfect on the training set
     ensemble = SerialBaggingRegressor(
-        base_estimator=DecisionTreeRegressor(),
+        estimator=DecisionTreeRegressor(),
         max_samples=1.0,
         bootstrap=True,
         random_state=rng,
     ).fit(X_train, y_train)
 
-    assert base_estimator.score(X_train, y_train) > ensemble.score(X_train, y_train)
+    assert estimator.score(X_train, y_train) > ensemble.score(X_train, y_train)
 
     # check that each sampling correspond to a complete bootstrap resample.
     # the size of each bootstrap should be the same as the input data but
     # the data should be different (checked using the hash of the data).
     ensemble = SerialBaggingRegressor(
-        base_estimator=DummySizeEstimator(), bootstrap=True
+        estimator=DummySizeEstimator(), bootstrap=True
     ).fit(X_train, y_train)
     training_hash = []
     for estimator in ensemble.estimators_:
@@ -294,7 +292,7 @@ def test_bootstrap_features():
     )
 
     ensemble = SerialBaggingRegressor(
-        base_estimator=DecisionTreeRegressor(),
+        estimator=DecisionTreeRegressor(),
         max_features=1.0,
         bootstrap_features=False,
         random_state=rng,
@@ -304,7 +302,7 @@ def test_bootstrap_features():
         assert diabetes.data.shape[1] == np.unique(features).shape[0]
 
     ensemble = SerialBaggingRegressor(
-        base_estimator=DecisionTreeRegressor(),
+        estimator=DecisionTreeRegressor(),
         max_features=1.0,
         bootstrap_features=True,
         random_state=rng,
@@ -324,7 +322,7 @@ def test_probability():
     with np.errstate(divide="ignore", invalid="ignore"):
         # Normal case
         ensemble = SerialBaggingClassifier(
-            base_estimator=DecisionTreeClassifier(), random_state=rng
+            estimator=DecisionTreeClassifier(), random_state=rng
         ).fit(X_train, y_train)
 
         assert_array_almost_equal(
@@ -337,7 +335,7 @@ def test_probability():
 
         # Degenerate case, where some classes are missing
         ensemble = SerialBaggingClassifier(
-            base_estimator=LogisticRegression(), random_state=rng, max_samples=5
+            estimator=LogisticRegression(), random_state=rng, max_samples=5
         ).fit(X_train, y_train)
 
         assert_array_almost_equal(
@@ -357,9 +355,9 @@ def test_oob_score_classification():
         iris.data, iris.target, random_state=rng
     )
 
-    for base_estimator in [DecisionTreeClassifier(), SVC()]:
+    for estimator in [DecisionTreeClassifier(), SVC()]:
         clf = SerialBaggingClassifier(
-            base_estimator=base_estimator,
+            estimator=estimator,
             n_estimators=100,
             bootstrap=True,
             oob_score=True,
@@ -374,7 +372,7 @@ def test_oob_score_classification():
         assert_warns(
             UserWarning,
             SerialBaggingClassifier(
-                base_estimator=base_estimator,
+                estimator=estimator,
                 n_estimators=1,
                 bootstrap=True,
                 oob_score=True,
@@ -394,7 +392,7 @@ def test_oob_score_regression():
     )
 
     clf = SerialBaggingRegressor(
-        base_estimator=DecisionTreeRegressor(),
+        estimator=DecisionTreeRegressor(),
         n_estimators=50,
         bootstrap=True,
         oob_score=True,
@@ -409,7 +407,7 @@ def test_oob_score_regression():
     assert_warns(
         UserWarning,
         SerialBaggingRegressor(
-            base_estimator=DecisionTreeRegressor(),
+            estimator=DecisionTreeRegressor(),
             n_estimators=1,
             bootstrap=True,
             oob_score=True,
@@ -428,7 +426,7 @@ def test_single_estimator():
     )
 
     clf1 = SerialBaggingRegressor(
-        base_estimator=KNeighborsRegressor(),
+        estimator=KNeighborsRegressor(),
         n_estimators=1,
         bootstrap=False,
         bootstrap_features=False,
@@ -464,7 +462,7 @@ def test_error():
     )
 
     # Test support of decision_function
-    assert not hasattr(SerialBaggingClassifier(base).fit(X, y), "decision_function")
+    assert hasattr(SerialBaggingClassifier(base).fit(X, y), "decision_function")
 
 
 def test_parallel_classification():
@@ -555,15 +553,15 @@ def test_gridsearch():
     y[y == 2] = 1
 
     # Grid search with scoring based on decision_function
-    parameters = {"n_estimators": (1, 2), "base_estimator__C": (1, 2)}
+    parameters = {"n_estimators": (1, 2), "estimator__C": (1, 2)}
 
     GridSearchCV(SerialBaggingClassifier(SVC()), parameters, scoring="roc_auc").fit(
         X, y
     )
 
 
-def test_base_estimator():
-    # Check base_estimator and its default values.
+def test_estimator():
+    # Check estimator and its default values.
     rng = check_random_state(0)
 
     # Classification
@@ -575,19 +573,19 @@ def test_base_estimator():
         X_train, y_train
     )
 
-    assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier)
+    assert isinstance(ensemble.estimator_, DecisionTreeClassifier)
 
     ensemble = SerialBaggingClassifier(
         DecisionTreeClassifier(), n_jobs=3, random_state=0
     ).fit(X_train, y_train)
 
-    assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier)
+    assert isinstance(ensemble.estimator_, DecisionTreeClassifier)
 
     ensemble = SerialBaggingClassifier(Perceptron(), n_jobs=3, random_state=0).fit(
         X_train, y_train
     )
 
-    assert isinstance(ensemble.base_estimator_, Perceptron)
+    assert isinstance(ensemble.estimator_, Perceptron)
 
     # Regression
     X_train, X_test, y_train, y_test = train_test_split(
@@ -598,18 +596,18 @@ def test_base_estimator():
         X_train, y_train
     )
 
-    assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor)
+    assert isinstance(ensemble.estimator_, DecisionTreeRegressor)
 
     ensemble = SerialBaggingRegressor(
         DecisionTreeRegressor(), n_jobs=3, random_state=0
     ).fit(X_train, y_train)
 
-    assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor)
+    assert isinstance(ensemble.estimator_, DecisionTreeRegressor)
 
     ensemble = SerialBaggingRegressor(SVR(), n_jobs=3, random_state=0).fit(
         X_train, y_train
     )
-    assert isinstance(ensemble.base_estimator_, SVR)
+    assert isinstance(ensemble.estimator_, SVR)
 
 
 def test_bagging_with_pipeline():
@@ -802,7 +800,7 @@ def test_estimators_samples_deterministic():
         SparseRandomProjection(n_components=2), LogisticRegression()
     )
     clf = SerialBaggingClassifier(
-        base_estimator=base_pipeline, max_samples=0.5, random_state=0
+        estimator=base_pipeline, max_samples=0.5, random_state=0
     )
     clf.fit(X, y)
     pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy()
@@ -942,7 +940,7 @@ def fit(self, X, y):
             self._sample_indices = y
 
     clf = SerialBaggingRegressor(
-        base_estimator=MyEstimator(), n_estimators=1, random_state=0
+        estimator=MyEstimator(), n_estimators=1, random_state=0
     )
     clf.fit(X, y)
 

From df47b7ff04305e29c07ba4371dbe59a83a112e5e Mon Sep 17 00:00:00 2001
From: Ariel Rokem <arokem@gmail.com>
Date: Mon, 4 Mar 2024 17:20:14 -0800
Subject: [PATCH 4/4] More "base_estimator" => "estimator".

---
 afqinsight/pipeline.py             |  2 +-
 afqinsight/tests/test_pipelines.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/afqinsight/pipeline.py b/afqinsight/pipeline.py
index ad5e8a6..c218051 100755
--- a/afqinsight/pipeline.py
+++ b/afqinsight/pipeline.py
@@ -273,7 +273,7 @@ def call_with_kwargs(Transformer, kwargs):
                 else:
                     ensembler_kwargs = {}
 
-                ensembler_kwargs["base_estimator"] = base_estimator
+                ensembler_kwargs["estimator"] = base_estimator
 
                 if isinstance(ensemble_meta_estimator, str):
                     if ensemble_meta_estimator.lower() == "bagging":
diff --git a/afqinsight/tests/test_pipelines.py b/afqinsight/tests/test_pipelines.py
index 5e27bae..1042192 100644
--- a/afqinsight/tests/test_pipelines.py
+++ b/afqinsight/tests/test_pipelines.py
@@ -113,9 +113,9 @@ def test_classifier_pipeline_steps(
         else:
             assert isinstance(pipeline.named_steps["estimate"], EnsembleStep)  # nosec
             ensemble_params = pipeline.named_steps["estimate"].get_params()
-            correct_params = EnsembleStep(base_estimator=EstimatorStep()).get_params()
-            ensemble_base_est = ensemble_params.pop("base_estimator")
-            correct_params.pop("base_estimator")
+            correct_params = EnsembleStep(estimator=EstimatorStep()).get_params()
+            ensemble_base_est = ensemble_params.pop("estimator")
+            correct_params.pop("estimator")
             assert ensemble_params == correct_params  # nosec
             assert isinstance(ensemble_base_est, EstimatorStep)  # nosec
     else:
@@ -132,9 +132,9 @@ def test_classifier_pipeline_steps(
                 pipeline.named_steps["estimate"].regressor, EnsembleStep
             )
             ensemble_params = pipeline.named_steps["estimate"].regressor.get_params()
-            correct_params = EnsembleStep(base_estimator=EstimatorStep()).get_params()
-            ensemble_base_est = ensemble_params.pop("base_estimator")
-            correct_params.pop("base_estimator")
+            correct_params = EnsembleStep(estimator=EstimatorStep()).get_params()
+            ensemble_base_est = ensemble_params.pop("estimator")
+            correct_params.pop("estimator")
             assert ensemble_params == correct_params  # nosec
             assert isinstance(ensemble_base_est, EstimatorStep)  # nosec