From d880ece5e8a4a7ff7bdc86b7d74341c07a7731ad Mon Sep 17 00:00:00 2001 From: liususan091219 Date: Wed, 30 Nov 2022 19:17:02 -0500 Subject: [PATCH 1/3] add model selection for nlg --- flaml/model.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/flaml/model.py b/flaml/model.py index c23c894c04..f0f677d8ad 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -905,18 +905,29 @@ def search_space(cls, data_size, task, **params): If OOM, user should change the search space themselves """ - search_space_dict["model_path"] = { - "domain": tune.choice( - [ - "google/electra-base-discriminator", - "bert-base-uncased", - "roberta-base", - "facebook/muppet-roberta-base", - "google/electra-small-discriminator", - ] - ), - "init_value": "facebook/muppet-roberta-base", - } + if task in NLG_TASKS: + search_space_dict["model_path"] = { + "domain": tune.choice( + [ + "t5-small", + "facebook/bart-base", + ] + ), + "init_value": "t5-small", + } + else: + search_space_dict["model_path"] = { + "domain": tune.choice( + [ + "google/electra-base-discriminator", + "bert-base-uncased", + "roberta-base", + "facebook/muppet-roberta-base", + "google/electra-small-discriminator", + ] + ), + "init_value": "facebook/muppet-roberta-base", + } return search_space_dict From 69b50c51a9c64651446dd3ea52adc9bfb8361b62 Mon Sep 17 00:00:00 2001 From: liususan091219 Date: Fri, 10 Mar 2023 17:52:48 -0500 Subject: [PATCH 2/3] add model selection for nlg --- flaml/automl/model.py | 30 ++++++++------ .../nlp/default/test_autohf_modelselection.py | 41 +++++++++++++++++++ 2 files changed, 59 insertions(+), 12 deletions(-) create mode 100644 test/nlp/default/test_autohf_modelselection.py diff --git a/flaml/automl/model.py b/flaml/automl/model.py index 0d8b32c7d7..86f63f2abd 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -907,18 +907,24 @@ def search_space(cls, data_size, task, **params): If OOM, user should change the search space themselves """ - search_space_dict["model_path"] = { - "domain": tune.choice( - [ - "google/electra-base-discriminator", - "bert-base-uncased", - "roberta-base", - "facebook/muppet-roberta-base", - "google/electra-small-discriminator", - ] - ), - "init_value": "facebook/muppet-roberta-base", - } + if task not in NLG_TASKS: + search_space_dict["model_path"] = { + "domain": tune.choice( + [ + "google/electra-base-discriminator", + "bert-base-uncased", + "roberta-base", + "facebook/muppet-roberta-base", + "google/electra-small-discriminator", + ] + ), + "init_value": "facebook/muppet-roberta-base", + } + else: + search_space_dict["model_path"] = { + "domain": tune.choice(["t5-small", "facebook/bart-base"]), + "init_value": "t5-small", + } return search_space_dict diff --git a/test/nlp/default/test_autohf_modelselection.py b/test/nlp/default/test_autohf_modelselection.py new file mode 100644 index 0000000000..97669aca39 --- /dev/null +++ b/test/nlp/default/test_autohf_modelselection.py @@ -0,0 +1,41 @@ +import sys +import pytest +import requests +from utils import get_toy_data_summarization, get_automl_settings +import os +import shutil + + +@pytest.mark.skipif( + sys.platform == "darwin" or sys.version < "3.7", + reason="do not run on mac os or py<3.7", +) +def test_hf_ms(): + from flaml import AutoML + + X_train, y_train, X_val, y_val, X_test = get_toy_data_summarization() + + automl = AutoML() + + automl_settings = get_automl_settings() + automl_settings["estimator_list"] = ["transformer_ms"] + automl_settings["task"] = "summarization" + automl_settings["metric"] = "rouge1" + automl_settings["time_budget"] = 2 * automl_settings["time_budget"] + + try: + automl.fit( + X_train=X_train, + y_train=y_train, + X_val=X_val, + y_val=y_val, + **automl_settings + ) + automl.score(X_val, y_val, **{"metric": "accuracy"}) + automl.pickle("automl.pkl") + except requests.exceptions.HTTPError: + return + + +if __name__ == "__main__": + test_hf_ms() From b7261940662a066fe6db3be8e7c52c43b708d099 Mon Sep 17 00:00:00 2001 From: liususan091219 Date: Sat, 11 Mar 2023 20:27:56 -0500 Subject: [PATCH 3/3] addressing error --- .../{default => }/test_autohf_modelselection.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) rename test/nlp/{default => }/test_autohf_modelselection.py (73%) diff --git a/test/nlp/default/test_autohf_modelselection.py b/test/nlp/test_autohf_modelselection.py similarity index 73% rename from test/nlp/default/test_autohf_modelselection.py rename to test/nlp/test_autohf_modelselection.py index 97669aca39..a08267c9fd 100644 --- a/test/nlp/default/test_autohf_modelselection.py +++ b/test/nlp/test_autohf_modelselection.py @@ -17,11 +17,16 @@ def test_hf_ms(): automl = AutoML() - automl_settings = get_automl_settings() - automl_settings["estimator_list"] = ["transformer_ms"] - automl_settings["task"] = "summarization" - automl_settings["metric"] = "rouge1" - automl_settings["time_budget"] = 2 * automl_settings["time_budget"] + automl_settings = { + "gpu_per_trial": 0, + "max_iter": 3, + "time_budget": 20, + "task": "summarization", + "metric": "rouge1", + "log_file_name": "seqclass.log", + "use_ray": False, + "estimator_list": ["transformer_ms"], + } try: automl.fit(