Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
- Example: 10.2.1.4 is the 5th version that supports khiops 10.2.1.
- Internals: Changes in *Internals* sections are unlikely to be of interest for data scientists.

## Unreleased

### Added
- (`sklearn`) `n_feature_parts` parameter to the supervised estimators

## 11.0.0.2 - 2026-01-26

## Fixed
Expand Down
28 changes: 27 additions & 1 deletion khiops/sklearn/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1195,6 +1195,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
verbose=False,
output_dir=None,
auto_sort=True,
Expand All @@ -1211,6 +1212,7 @@ def __init__(
self.specific_pairs = specific_pairs
self.all_possible_pairs = all_possible_pairs
self.construction_rules = construction_rules
self.n_feature_parts = n_feature_parts
self._original_target_dtype = None
self._predicted_target_meta_data_tag = None
self._khiops_baseline_model_prefix = None
Expand Down Expand Up @@ -1300,6 +1302,12 @@ def _fit_check_params(self, ds, **kwargs):
for rule in self.construction_rules:
if not isinstance(rule, str):
raise TypeError(type_error_message(rule, rule, str))
if not isinstance(self.n_feature_parts, int):
raise TypeError(
type_error_message("n_feature_parts", self.n_feature_parts, int)
)
if self.n_feature_parts < 0:
raise ValueError("'n_feature_parts' must be positive")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

positive -> non-negative

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are totally right, this reformulation would be more precise. Unfortunatly the message is duplicated many times and for consistency sake it would be required to modify everywhere (in a distinct PR)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A commit for fixing this would suffice. It is a small change for a PR.


def _fit_train_model(self, ds, computation_dir, **kwargs):
# Train the model with Khiops
Expand Down Expand Up @@ -1384,6 +1392,7 @@ def _fit_prepare_training_function_inputs(self, ds, computation_dir):
kwargs["max_trees"] = kwargs.pop("n_trees")
kwargs["max_text_features"] = kwargs.pop("n_text_features")
kwargs["text_features"] = kwargs.pop("type_text_features")
kwargs["max_parts"] = kwargs.pop("n_feature_parts")

# Add the additional_data_tables parameter
kwargs["additional_data_tables"] = additional_data_tables
Expand Down Expand Up @@ -1513,6 +1522,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
verbose=False,
output_dir=None,
auto_sort=True,
Expand All @@ -1525,6 +1535,7 @@ def __init__(
specific_pairs=specific_pairs,
all_possible_pairs=all_possible_pairs,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down Expand Up @@ -1685,7 +1696,10 @@ class KhiopsClassifier(ClassifierMixin, KhiopsPredictor):
construction_rules : list of str, optional
Allowed rules for the automatic feature construction. If not set, Khiops
uses the multi-table construction rules listed in
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
n_feature_parts : int, default 0
Maximum number of variable parts produced by preprocessing methods. If equal
to 0 it is automatically calculated.
group_target_value : bool, default ``False``
Allows grouping of the target values in classification. It can substantially
increase the training time.
Expand Down Expand Up @@ -1744,6 +1758,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
group_target_value=False,
verbose=False,
output_dir=None,
Expand All @@ -1757,6 +1772,7 @@ def __init__(
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down Expand Up @@ -2086,6 +2102,9 @@ class KhiopsRegressor(RegressorMixin, KhiopsPredictor):
Allowed rules for the automatic feature construction. If not set, Khiops
uses the multi-table construction rules listed in
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
n_feature_parts : int, default 0
Maximum number of variable parts produced by preprocessing methods. If equal
to 0 it is automatically calculated.
verbose : bool, default ``False``
If ``True`` it prints debug information and it does not erase temporary files
when fitting, predicting or transforming.
Expand Down Expand Up @@ -2129,6 +2148,7 @@ def __init__(
n_selected_features=0,
n_evaluated_features=0,
construction_rules=None,
n_feature_parts=0,
verbose=False,
output_dir=None,
auto_sort=True,
Expand All @@ -2141,6 +2161,7 @@ def __init__(
n_selected_features=n_selected_features,
n_evaluated_features=n_evaluated_features,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down Expand Up @@ -2296,6 +2317,9 @@ class KhiopsEncoder(TransformerMixin, KhiopsSupervisedEstimator):
Allowed rules for the automatic feature construction. If not set, Khiops
uses the multi-table construction rules listed in
`kh.DEFAULT_CONSTRUCTION_RULES <khiops.core.api.DEFAULT_CONSTRUCTION_RULES>`.
n_feature_parts : int, default 0
Maximum number of variable parts produced by preprocessing methods. If equal
to 0 it is automatically calculated.
informative_features_only : bool, default ``True``
If ``True`` keeps only informative features.
group_target_value : bool, default ``False``
Expand Down Expand Up @@ -2374,6 +2398,7 @@ def __init__(
specific_pairs=None,
all_possible_pairs=True,
construction_rules=None,
n_feature_parts=0,
informative_features_only=True,
group_target_value=False,
keep_initial_variables=False,
Expand All @@ -2390,6 +2415,7 @@ def __init__(
n_text_features=n_text_features,
type_text_features=type_text_features,
construction_rules=construction_rules,
n_feature_parts=n_feature_parts,
verbose=verbose,
output_dir=output_dir,
auto_sort=auto_sort,
Expand Down
13 changes: 13 additions & 0 deletions tests/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,7 @@ def setUpClass(cls):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 0,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure in this testcase we need to test that the default value provided to the Core API is correct or unchanged via sklearn. Indeed, in this testcase we test that the parameters are correctly passed from the sklearn code to the core API.

"group_target_value": False,
"additional_data_tables": {},
}
Expand Down Expand Up @@ -790,6 +791,7 @@ def setUpClass(cls):
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 5,
"additional_data_tables": {},
}
},
Expand Down Expand Up @@ -818,6 +820,7 @@ def setUpClass(cls):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 7,
"informative_variables_only": True,
"group_target_value": False,
"keep_initial_categorical_variables": False,
Expand Down Expand Up @@ -860,6 +863,7 @@ def setUpClass(cls):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 4,
"group_target_value": False,
"additional_data_tables": {"SpliceJunctionDNA"},
}
Expand Down Expand Up @@ -889,6 +893,7 @@ def setUpClass(cls):
"max_selected_variables": 1,
"max_evaluated_variables": 3,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 6,
"additional_data_tables": {"SpliceJunctionDNA"},
}
},
Expand Down Expand Up @@ -918,6 +923,7 @@ def setUpClass(cls):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"max_parts": 8,
"informative_variables_only": True,
"group_target_value": False,
"keep_initial_categorical_variables": False,
Expand Down Expand Up @@ -1480,6 +1486,7 @@ def test_parameter_transfer_classifier_fit_from_multitable_dataframe(self):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 4,
"group_target_value": False,
},
)
Expand Down Expand Up @@ -1517,6 +1524,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe(self):
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 7,
"informative_features_only": True,
"group_target_value": False,
"keep_initial_variables": False,
Expand All @@ -1543,6 +1551,7 @@ def test_parameter_transfer_encoder_fit_from_monotable_dataframe_with_df_y(
"specific_pairs": [("age", "race")],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 7,
"informative_features_only": True,
"group_target_value": False,
"keep_initial_variables": False,
Expand All @@ -1568,6 +1577,7 @@ def test_parameter_transfer_encoder_fit_from_multitable_dataframe(self):
"specific_pairs": [],
"all_possible_pairs": False,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 8,
"informative_features_only": True,
"group_target_value": False,
"keep_initial_variables": False,
Expand Down Expand Up @@ -1608,6 +1618,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe(self):
"n_text_features": 300000,
"type_text_features": "ngrams",
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 5,
},
)

Expand All @@ -1626,6 +1637,7 @@ def test_parameter_transfer_regressor_fit_from_monotable_dataframe_with_df_y(
"n_text_features": 300000,
"type_text_features": "ngrams",
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 5,
},
)

Expand All @@ -1644,6 +1656,7 @@ def test_parameter_transfer_regressor_fit_from_multitable_dataframe(self):
"n_selected_features": 1,
"n_evaluated_features": 3,
"construction_rules": ["TableMode", "TableSelection"],
"n_feature_parts": 6,
},
)

Expand Down
Loading