monte-flora · monte-flora · Apr 2, 2026 · Apr 2, 2026
diff --git a/skexplain/main/global_explainer.py b/skexplain/main/global_explainer.py
@@ -1228,6 +1228,16 @@ def feature_encoder_func(data):
         xdata = np.array([np.unique(original_feature_values)])
         xdata.sort()
 
+        # Determine the full set of categories from the original data
+        # so all bootstrap iterations produce ALE values aligned to the same categories.
+        X_full = self.X.copy()
+        if (X_full[feature].dtype.name != "category") or (not X_full[feature].cat.ordered):
+            X_full[feature] = X_full[feature].astype("string")
+            full_groups_order = order_groups(X_full, feature)
+            all_categories = full_groups_order.index.values
+        else:
+            all_categories = X_full[feature].cat.categories.values
+
         # Initialize an empty ale array
         ale = []
 
@@ -1355,7 +1365,11 @@ def feature_encoder_func(data):
 
             # Subtract the mean value to get the centered value.
             ale_temp = res_df["ale"] - sum(res_df["ale"] * groups_props)
-            ale.append(ale_temp)
+
+            # Reindex to the full set of categories so all bootstrap
+            # iterations have the same length. Missing categories get NaN.
+            ale_temp = ale_temp.reindex(all_categories, fill_value=np.nan)
+            ale.append(ale_temp.values)
 
         ale = np.array(ale, dtype=float)
 

diff --git a/tests/test_interpret_curves.py b/tests/test_interpret_curves.py
@@ -2,9 +2,10 @@
 # Unit test for the ALE and PD 
 # code in Scikit-Explain.
 #===================================================
-import sys, os 
+import sys, os
 sys.path.insert(0, os.path.dirname(os.getcwd()))
 
+import unittest
 import shap
 import numpy as np
 import skexplain
@@ -239,5 +240,48 @@ def test_mec(self):
         self.assertAlmostEqual(mec, 1., 4)
 
 
+class TestCategoricalALEHighCardinality(unittest.TestCase):
+    """Test categorical ALE with high-cardinality features (issue #86)."""
+
+    def setUp(self):
+        import pandas as pd
+        self.y = pd.DataFrame({"target": range(0, 100)})
+        self.X = pd.DataFrame({
+            "continuous": range(0, 100),
+            "low_card": pd.Categorical([1.1]*25 + [1.2]*25 + [1.3]*25 + [1.4]*25),
+            "high_card": pd.Categorical(np.linspace(0, 10, num=100)),
+            "med_card": pd.Categorical(list(np.linspace(0, 10, num=50)) * 2),
+        })
+        self.rf = RandomForestRegressor(n_estimators=10, random_state=42)
+        self.rf.fit(self.X, self.y)
+
+    def test_high_cardinality_categorical_ale(self):
+        """High cardinality categorical feature should not crash (issue #86)."""
+        explainer = skexplain.ExplainToolkit(
+            estimators=("rf", self.rf), X=self.X, y=self.y,
+        )
+        ale = explainer.ale(features="high_card", n_bootstrap=2, n_bins=5)
+        self.assertIn("high_card__rf__ale", ale.data_vars)
+
+    def test_medium_cardinality_categorical_ale(self):
+        """Medium cardinality categorical with duplicates should work."""
+        explainer = skexplain.ExplainToolkit(
+            estimators=("rf", self.rf), X=self.X, y=self.y,
+        )
+        ale = explainer.ale(features="med_card", n_bootstrap=2, n_bins=5)
+        self.assertIn("med_card__rf__ale", ale.data_vars)
+
+    def test_low_cardinality_categorical_ale(self):
+        """Low cardinality categorical should work as before."""
+        explainer = skexplain.ExplainToolkit(
+            estimators=("rf", self.rf), X=self.X, y=self.y,
+        )
+        ale = explainer.ale(features="low_card", n_bootstrap=2, n_bins=5)
+        self.assertIn("low_card__rf__ale", ale.data_vars)
+        # Should have shape (n_bootstrap, n_categories)
+        self.assertEqual(ale["low_card__rf__ale"].shape[0], 2)
+        self.assertEqual(ale["low_card__rf__ale"].shape[1], 4)
+
+
 if __name__ == "__main__":
     unittest.main()