From 351577e875c31896d285d923aae76ed6676db3a1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 27 Jan 2026 05:49:07 +0000 Subject: [PATCH 1/5] Initial plan From eb6113cce7f7d43723be25869e5b2fbbdb2ffba1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 27 Jan 2026 05:52:44 +0000 Subject: [PATCH 2/5] Fix transformers API compatibility issues Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/model.py | 2 +- flaml/automl/nlp/huggingface/utils.py | 37 +++++++++++++-------------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/flaml/automl/model.py b/flaml/automl/model.py index 65ff771993..dc222cd6f2 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1201,7 +1201,7 @@ def on_epoch_end(self, args, state, control, **callback_kwargs): model_init=self._model_init, train_dataset=train_dataset, eval_dataset=eval_dataset, - tokenizer=self.tokenizer, + processing_class=self.tokenizer, data_collator=self.data_collator, compute_metrics=self._compute_metrics_by_dataset_name, callbacks=[EarlyStoppingCallbackForAuto], diff --git a/flaml/automl/nlp/huggingface/utils.py b/flaml/automl/nlp/huggingface/utils.py index 17e601a8f0..0d04cfa29e 100644 --- a/flaml/automl/nlp/huggingface/utils.py +++ b/flaml/automl/nlp/huggingface/utils.py @@ -211,29 +211,28 @@ def tokenize_onedataframe( hf_args=None, prefix_str=None, ): - with tokenizer.as_target_tokenizer(): - _, tokenized_column_names = tokenize_row( - dict(X.iloc[0]), + _, tokenized_column_names = tokenize_row( + dict(X.iloc[0]), + tokenizer, + prefix=(prefix_str,) if task is SUMMARIZATION else None, + task=task, + hf_args=hf_args, + return_column_name=True, + ) + d = X.apply( + lambda x: tokenize_row( + x, tokenizer, prefix=(prefix_str,) if task is SUMMARIZATION else None, task=task, hf_args=hf_args, - return_column_name=True, - ) - d = X.apply( - lambda x: tokenize_row( - x, - tokenizer, - prefix=(prefix_str,) if task is SUMMARIZATION else None, - task=task, - hf_args=hf_args, - ), - axis=1, - result_type="expand", - ) - X_tokenized = pd.DataFrame(columns=tokenized_column_names) - X_tokenized[tokenized_column_names] = d - return X_tokenized + ), + axis=1, + result_type="expand", + ) + X_tokenized = pd.DataFrame(columns=tokenized_column_names) + X_tokenized[tokenized_column_names] = d + return X_tokenized def tokenize_row( From 0e276e8c42ddbe5a853fe0a51485fbdc86369b2b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 27 Jan 2026 12:42:07 +0000 Subject: [PATCH 3/5] Add backward compatibility for transformers v4.26+ by version check Support both tokenizer (v4.26-4.43) and processing_class (v4.44+) parameters based on installed transformers version. Fallback to tokenizer if version check fails. Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/model.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/flaml/automl/model.py b/flaml/automl/model.py index dc222cd6f2..18fdfba20e 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1196,16 +1196,31 @@ def on_epoch_end(self, args, state, control, **callback_kwargs): control.should_save = True control.should_evaluate = True - self._trainer = TrainerForAuto( - args=self._training_args, - model_init=self._model_init, - train_dataset=train_dataset, - eval_dataset=eval_dataset, - processing_class=self.tokenizer, - data_collator=self.data_collator, - compute_metrics=self._compute_metrics_by_dataset_name, - callbacks=[EarlyStoppingCallbackForAuto], - ) + # Use processing_class for transformers >= 4.44.0, tokenizer for older versions + trainer_kwargs = { + "args": self._training_args, + "model_init": self._model_init, + "train_dataset": train_dataset, + "eval_dataset": eval_dataset, + "data_collator": self.data_collator, + "compute_metrics": self._compute_metrics_by_dataset_name, + "callbacks": [EarlyStoppingCallbackForAuto], + } + + # Check if processing_class parameter is supported (transformers >= 4.44.0) + try: + import transformers + from packaging import version + + if version.parse(transformers.__version__) >= version.parse("4.44.0"): + trainer_kwargs["processing_class"] = self.tokenizer + else: + trainer_kwargs["tokenizer"] = self.tokenizer + except Exception: + # Fallback to tokenizer if version check fails + trainer_kwargs["tokenizer"] = self.tokenizer + + self._trainer = TrainerForAuto(**trainer_kwargs) if self._task in NLG_TASKS: setattr(self._trainer, "_is_seq2seq", True) @@ -1602,9 +1617,7 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): self._time_per_iter = ( (self._t2 - self._t1) / (self.params[self.ITER_HP] - 1) if self._t2 > self._t1 - else self._t1 - if self._t1 - else 0.001 + else self._t1 if self._t1 else 0.001 ) self._train_size = X_train.shape[0] if budget is not None and self._t1 + self._t2 >= budget or n_iter == self.params[self.ITER_HP]: From a63f59ebf1c41b0429b270412ab30dc1b140336d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 27 Jan 2026 12:43:46 +0000 Subject: [PATCH 4/5] Improve exception handling specificity Use specific exception types (ImportError, AttributeError, ValueError) instead of broad Exception catch for better error handling. Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flaml/automl/model.py b/flaml/automl/model.py index 18fdfba20e..ece0c14d88 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1216,7 +1216,7 @@ def on_epoch_end(self, args, state, control, **callback_kwargs): trainer_kwargs["processing_class"] = self.tokenizer else: trainer_kwargs["tokenizer"] = self.tokenizer - except Exception: + except (ImportError, AttributeError, ValueError): # Fallback to tokenizer if version check fails trainer_kwargs["tokenizer"] = self.tokenizer From a6e5917b2cff17fc3a6df10a3ff930979e10ddb8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 27 Jan 2026 12:54:45 +0000 Subject: [PATCH 5/5] Run pre-commit formatting on all files Applied black formatting to fix code style across the repository. Co-authored-by: thinkall <3197038+thinkall@users.noreply.github.com> --- flaml/automl/model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flaml/automl/model.py b/flaml/automl/model.py index ece0c14d88..b74137cf6b 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1617,7 +1617,9 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): self._time_per_iter = ( (self._t2 - self._t1) / (self.params[self.ITER_HP] - 1) if self._t2 > self._t1 - else self._t1 if self._t1 else 0.001 + else self._t1 + if self._t1 + else 0.001 ) self._train_size = X_train.shape[0] if budget is not None and self._t1 + self._t2 >= budget or n_iter == self.params[self.ITER_HP]: