From d084a67b4c204ce2087642e96bc28d8cdc3ab482 Mon Sep 17 00:00:00 2001 From: Thierry RAMORASOAVINA Date: Thu, 22 Jan 2026 16:54:02 +0100 Subject: [PATCH] Add the `n_feature_parts` parameter to the supervised estimators - KhiopsClassifier, KhiopsRegressor and KhiopsEncoder --- CHANGELOG.md | 5 +++++ khiops/sklearn/estimators.py | 28 +++++++++++++++++++++++++++- tests/test_sklearn.py | 13 +++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc440647..58ed6a29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ - Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1. - Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists. +## Unreleased + +### Added +- (`sklearn`) `n_feature_parts` parameter to the supervised estimators + ## 11.0.0.2 - 2026-01-26 ## Fixed diff --git a/khiops/sklearn/estimators.py b/khiops/sklearn/estimators.py index 3ad6776b..f1605dfc 100644 --- a/khiops/sklearn/estimators.py +++ b/khiops/sklearn/estimators.py @@ -1195,6 +1195,7 @@ def __init__( specific_pairs=None, all_possible_pairs=True, construction_rules=None, + n_feature_parts=0, verbose=False, output_dir=None, auto_sort=True, @@ -1211,6 +1212,7 @@ def __init__( self.specific_pairs = specific_pairs self.all_possible_pairs = all_possible_pairs self.construction_rules = construction_rules + self.n_feature_parts = n_feature_parts self._original_target_dtype = None self._predicted_target_meta_data_tag = None self._khiops_baseline_model_prefix = None @@ -1300,6 +1302,12 @@ def _fit_check_params(self, ds, **kwargs): for rule in self.construction_rules: if not isinstance(rule, str): raise TypeError(type_error_message(rule, rule, str)) + if not isinstance(self.n_feature_parts, int): + raise TypeError( + type_error_message("n_feature_parts", self.n_feature_parts, int) + ) + if self.n_feature_parts < 0: + raise ValueError("'n_feature_parts' must be positive") def _fit_train_model(self, ds, computation_dir, **kwargs): # Train the model with Khiops @@ -1384,6 +1392,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir): kwargs["max_trees"] = kwargs.pop("n_trees") kwargs["max_text_features"] = kwargs.pop("n_text_features") kwargs["text_features"] = kwargs.pop("type_text_features") + kwargs["max_parts"] = kwargs.pop("n_feature_parts") # Add the additional_data_tables parameter kwargs["additional_data_tables"] = additional_data_tables @@ -1513,6 +1522,7 @@ def __init__( specific_pairs=None, all_possible_pairs=True, construction_rules=None, + n_feature_parts=0, verbose=False, output_dir=None, auto_sort=True, @@ -1525,6 +1535,7 @@ def __init__( specific_pairs=specific_pairs, all_possible_pairs=all_possible_pairs, construction_rules=construction_rules, + n_feature_parts=n_feature_parts, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, @@ -1685,7 +1696,10 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor): construction_rules : list of str, optional Allowed rules for the automatic feature construction. If not set, Khiops uses the multi-table construction rules listed in - `kh.DEFAULT_CONSTRUCTION_RULES ` + `kh.DEFAULT_CONSTRUCTION_RULES `. + n_feature_parts : int, default 0 + Maximum number of variable parts produced by preprocessing methods. If equal + to 0 it is automatically calculated. group_target_value : bool, default ``False`` Allows grouping of the target values in classification. It can substantially increase the training time. @@ -1744,6 +1758,7 @@ def __init__( specific_pairs=None, all_possible_pairs=True, construction_rules=None, + n_feature_parts=0, group_target_value=False, verbose=False, output_dir=None, @@ -1757,6 +1772,7 @@ def __init__( n_selected_features=n_selected_features, n_evaluated_features=n_evaluated_features, construction_rules=construction_rules, + n_feature_parts=n_feature_parts, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, @@ -2086,6 +2102,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor): Allowed rules for the automatic feature construction. If not set, Khiops uses the multi-table construction rules listed in `kh.DEFAULT_CONSTRUCTION_RULES `. + n_feature_parts : int, default 0 + Maximum number of variable parts produced by preprocessing methods. If equal + to 0 it is automatically calculated. verbose : bool, default ``False`` If ``True`` it prints debug information and it does not erase temporary files when fitting, predicting or transforming. @@ -2129,6 +2148,7 @@ def __init__( n_selected_features=0, n_evaluated_features=0, construction_rules=None, + n_feature_parts=0, verbose=False, output_dir=None, auto_sort=True, @@ -2141,6 +2161,7 @@ def __init__( n_selected_features=n_selected_features, n_evaluated_features=n_evaluated_features, construction_rules=construction_rules, + n_feature_parts=n_feature_parts, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, @@ -2296,6 +2317,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator): Allowed rules for the automatic feature construction. If not set, Khiops uses the multi-table construction rules listed in `kh.DEFAULT_CONSTRUCTION_RULES `. + n_feature_parts : int, default 0 + Maximum number of variable parts produced by preprocessing methods. If equal + to 0 it is automatically calculated. informative_features_only : bool, default ``True`` If ``True`` keeps only informative features. group_target_value : bool, default ``False`` @@ -2374,6 +2398,7 @@ def __init__( specific_pairs=None, all_possible_pairs=True, construction_rules=None, + n_feature_parts=0, informative_features_only=True, group_target_value=False, keep_initial_variables=False, @@ -2390,6 +2415,7 @@ def __init__( n_text_features=n_text_features, type_text_features=type_text_features, construction_rules=construction_rules, + n_feature_parts=n_feature_parts, verbose=verbose, output_dir=output_dir, auto_sort=auto_sort, diff --git a/tests/test_sklearn.py b/tests/test_sklearn.py index 1c515652..bfd069d7 100644 --- a/tests/test_sklearn.py +++ b/tests/test_sklearn.py @@ -762,6 +762,7 @@ def setUpClass(cls): "specific_pairs": [("age", "race")], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "max_parts": 0, "group_target_value": False, "additional_data_tables": {}, } @@ -790,6 +791,7 @@ def setUpClass(cls): "max_selected_variables": 1, "max_evaluated_variables": 3, "construction_rules": ["TableMode", "TableSelection"], + "max_parts": 5, "additional_data_tables": {}, } }, @@ -818,6 +820,7 @@ def setUpClass(cls): "specific_pairs": [("age", "race")], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "max_parts": 7, "informative_variables_only": True, "group_target_value": False, "keep_initial_categorical_variables": False, @@ -860,6 +863,7 @@ def setUpClass(cls): "specific_pairs": [], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "max_parts": 4, "group_target_value": False, "additional_data_tables": {"SpliceJunctionDNA"}, } @@ -889,6 +893,7 @@ def setUpClass(cls): "max_selected_variables": 1, "max_evaluated_variables": 3, "construction_rules": ["TableMode", "TableSelection"], + "max_parts": 6, "additional_data_tables": {"SpliceJunctionDNA"}, } }, @@ -918,6 +923,7 @@ def setUpClass(cls): "specific_pairs": [], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "max_parts": 8, "informative_variables_only": True, "group_target_value": False, "keep_initial_categorical_variables": False, @@ -1480,6 +1486,7 @@ def test_parameter_transfer_classifier_fit_from_multitable_dataframe(self): "specific_pairs": [], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 4, "group_target_value": False, }, ) @@ -1517,6 +1524,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self): "specific_pairs": [("age", "race")], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 7, "informative_features_only": True, "group_target_value": False, "keep_initial_variables": False, @@ -1543,6 +1551,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y( "specific_pairs": [("age", "race")], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 7, "informative_features_only": True, "group_target_value": False, "keep_initial_variables": False, @@ -1568,6 +1577,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self): "specific_pairs": [], "all_possible_pairs": False, "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 8, "informative_features_only": True, "group_target_value": False, "keep_initial_variables": False, @@ -1608,6 +1618,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self): "n_text_features": 300000, "type_text_features": "ngrams", "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 5, }, ) @@ -1626,6 +1637,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y( "n_text_features": 300000, "type_text_features": "ngrams", "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 5, }, ) @@ -1644,6 +1656,7 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self): "n_selected_features": 1, "n_evaluated_features": 3, "construction_rules": ["TableMode", "TableSelection"], + "n_feature_parts": 6, }, )