From 5acb208ae0ddee1f30619a5822d51d9b0591be48 Mon Sep 17 00:00:00 2001 From: naomi-simumba Date: Thu, 14 Aug 2025 13:19:51 +0100 Subject: [PATCH 01/11] automatically plot Signed-off-by: naomi-simumba --- benchmark/utils.py | 139 +++++++++++++++++++++++++-------------------- 1 file changed, 78 insertions(+), 61 deletions(-) diff --git a/benchmark/utils.py b/benchmark/utils.py index 8c8a7f8..e8dc57c 100644 --- a/benchmark/utils.py +++ b/benchmark/utils.py @@ -28,7 +28,6 @@ "0.01x_train": 1, } - def unflatten(dictionary: Dict[str, Any]): resultDict: Dict = {} for key, value in dictionary.items(): @@ -212,13 +211,15 @@ def extract_repeated_experiment_results( if task in task_info: metric_name = task_info[task] metric_name = 'test_test/' + metric_name.split("/")[-1] - else: + else: continue if metric_name not in run.data.metrics: logger.info(f"{metric_name} not found in task {task}. Skipping") continue score = run.data.metrics[metric_name] + if ("rmse" in metric_name) or ("RMSE" in metric_name): + score = 1-score run_names.append(run.info.run_name) exp_ids.append(experiment_id) exp_names.append(original_experiment_name) @@ -305,21 +306,31 @@ def extract_parameters( if experiment_info is None: continue experiment_id = experiment_info.experiment_id - logger.info(f"\nexperiment_name: {experiment_name} ") + logger.info(f"\n\n\nexperiment_name: {experiment_name}.") logger.info(f"experiment_id: {experiment_info.experiment_id}") + exp_parent_run_name = f"top_run_{experiment_name}" experiment_parent_run_data = client.search_runs( experiment_ids=[experiment_id], filter_string=f'tags."mlflow.runName" LIKE "{exp_parent_run_name}"', ) + + logger.info( + f"experiment_parent_run_data: {len(experiment_parent_run_data)}" + ) + for run in experiment_parent_run_data: + logger.info( + f"{run.info.run_id}: {run.info.run_name}" + ) if (len(experiment_parent_run_data) > 1) or ( len(experiment_parent_run_data) == 0 ): - logger.debug( + logger.info( f"The number of parent runs for each experiment should be 1. \ - It is currently {len(experiment_parent_run_data)}" + It is currently {len(experiment_parent_run_data)}. Skipping." ) - raise RuntimeError + continue + #raise RuntimeError for run in experiment_parent_run_data: exp_parent_run_id = run.info.run_id @@ -333,39 +344,45 @@ def extract_parameters( for task in task_names: logger.info(f"task: {task}") - matching_runs = [run for run in runs if run.info.run_name.endswith(task)] # type: ignore - best_params = matching_runs[0].data.params - - # eval them - best_params = {k: literal_eval(v) for k, v in best_params.items()} - best_params["experiment_name"] = experiment_name - best_params["dataset"] = task - best_params["decoder"] = matching_runs[0].data.tags["decoder"] - best_params["backbone"] = matching_runs[0].data.tags["backbone"] - best_params["early_stop_patience"] = matching_runs[0].data.tags[ - "early_stop_patience" - ] - best_params["n_trials"] = matching_runs[0].data.tags["n_trials"] - best_params["partition_name"] = matching_runs[0].data.tags["partition_name"] - best_params["data_percentages"] = DATA_PARTITIONS[ - best_params["partition_name"] - ] - if 'optimizer_hparams' in best_params: - logger.info( - f"optimizer_hparams: {best_params['optimizer_hparams'].items()}" - ) - optimizer_hparams = { - k: v for k, v in best_params['optimizer_hparams'].items() - } - best_params.update(optimizer_hparams) - del best_params['optimizer_hparams'] - if 'model_args' in best_params: - model_args = {k: v for k, v in best_params['model_args'].items()} - best_params.update(model_args) - del best_params['model_args'] - - best_params = pd.DataFrame(best_params, index=[0]) - all_params.append(best_params) + try: #doing try/except because some tasks are incomplete and will raise an error + matching_runs = [run for run in runs if run.info.run_name.endswith(task)] # type: ignore + best_params = matching_runs[0].data.params + + # eval them + best_params = {k: literal_eval(v) for k, v in best_params.items()} + best_params["experiment_name"] = experiment_name + best_params["dataset"] = task + best_params["decoder"] = matching_runs[0].data.tags["decoder"] if "decoder" in matching_runs[0].data.tags else "N/A" + best_params["backbone"] = matching_runs[0].data.tags["backbone"] + best_params["early_stop_patience"] = matching_runs[0].data.tags[ + "early_stop_patience" + ] + best_params["n_trials"] = matching_runs[0].data.tags["n_trials"] + best_params["partition_name"] = matching_runs[0].data.tags["partition_name"] + best_params["data_percentages"] = DATA_PARTITIONS[ + best_params["partition_name"] + ] + if 'optimizer_hparams' in best_params: + logger.info( + f"optimizer_hparams: {best_params['optimizer_hparams'].items()}" + ) + optimizer_hparams = { + k: v for k, v in best_params['optimizer_hparams'].items() + } + best_params.update(optimizer_hparams) + del best_params['optimizer_hparams'] + if 'model_args' in best_params: + model_args = {k: v for k, v in best_params['model_args'].items()} + best_params.update(model_args) + del best_params['model_args'] + + best_params = pd.DataFrame(best_params, index=[0]) + all_params.append(best_params) + except Exception as e: + logger.info(f"error: {e}.") + continue + + logger.info(f"\n all_params: {len(all_params)}.") all_params = pd.concat(all_params, axis=0) all_params = all_params.reset_index() return all_params @@ -395,11 +412,9 @@ def get_results_and_parameters( pd.DataFrame with results and parameters """ if Path(storage_uri).exists() and Path(storage_uri).is_dir(): - results_dir = ( - Path(storage_uri).parents[0] / "summarized_results" / benchmark_name - ) + results_dir = Path(storage_uri).parents[0] / "summarized_results" / benchmark_name else: - print("Please use a valid directory for storage_uri") + logger.info("Please use a valid directory for storage_uri") raise ValueError if not os.path.exists(results_dir): os.makedirs(results_dir) @@ -430,6 +445,14 @@ def get_results_and_parameters( results_and_parameters.to_csv( f"{str(results_dir)}/results_and_parameters.csv", index=False ) + + visualize_combined_results( + combined_results=results_and_parameters, + storage_uri=storage_uri, + logger=logger, + plot_file_base_name=f"multiple_models_{benchmark_name}", + ) + return results_and_parameters @@ -577,7 +600,7 @@ def check_existing_experiments( exp_parent_run_name: str, task_names: list, n_trials: int, - backbone: str, + backbone: str ) -> Dict[str, Any]: """ checks if experiment has been completed (i.e. both task run and nested individual runs are complete) @@ -700,14 +723,15 @@ def visualize_combined_results( save_folder = Path(storage_uri).parents[0] / "visualizations" tables_folder = save_folder / "tables" plots_folder = save_folder / "plots" + normalizer_folder = save_folder / "normalizer" if not os.path.exists(tables_folder): os.makedirs(tables_folder) if not os.path.exists(plots_folder): os.makedirs(plots_folder) + if not os.path.exists(normalizer_folder): + os.makedirs(normalizer_folder) - combined_results = [] - model_order = [] - experiments = list(set(combined_results["experiment_name"])) + experiments = list(set(combined_results["experiment_name"].tolist())) combined_results = combined_results.rename(columns={"experiment_name": "model"}) num_experiments = len(experiments) fig_size = (num_experiments * 5, 6) if num_experiments >= 3 else (15, 6) @@ -717,12 +741,11 @@ def visualize_combined_results( zip(model_order, sns.color_palette("tab20", n_colors=len(model_order))) ) - try: + if True: # plot raw values plot_tools.plot_per_dataset( combined_results, model_order=model_order, - plot_file_base_name=plot_file_base_name, model_colors=model_colors, metric="test metric", sharey=False, @@ -741,12 +764,13 @@ def visualize_combined_results( combined_results, metrics=("test metric",), benchmark_name=plot_file_base_name, + normalizer_folder= normalizer_folder ) bootstrapped_iqm, normalized_combined_results = ( plot_tools.normalize_bootstrap_and_plot( - combined_results, - plot_file_base_name=plot_file_base_name, + df=combined_results, metric="test metric", + normalizer_folder=normalizer_folder, benchmark_name=plot_file_base_name, model_order=model_order, model_colors=model_colors, @@ -754,7 +778,6 @@ def visualize_combined_results( n_legend_rows=n_legend_rows, ) ) - # dataset_name_map=dataset_name_map) plt.savefig( str( @@ -772,7 +795,7 @@ def visualize_combined_results( tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv" ) ) - except Exception as e: + else: #except Exception as e: logger.info(f"could not visualize due to error: {e}") @@ -797,7 +820,6 @@ def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.Root logging.basicConfig(level=logging.CRITICAL) return logger - def import_custom_modules( logger: logging.RootLogger, custom_modules_path: str | Path | None = None, @@ -819,17 +841,12 @@ def import_custom_modules( module = importlib.import_module(module_dir) logger.info(f"Found {custom_modules_path}") except ImportError: - raise ImportError( - f"It was not possible to import modules from {custom_modules_path}." - ) + raise ImportError(f"It was not possible to import modules from {custom_modules_path}.") else: - raise ValueError( - f"Modules path {custom_modules_path} isn't a directory. Check if you have defined it properly." - ) + raise ValueError(f"Modules path {custom_modules_path} isn't a directory. Check if you have defined it properly.") else: logger.debug("No custom module is being used.") - if __name__ == "__main__": logger = get_logger() storage_uri = "results_folder/hpo" # storage_uri from config From ee23d32c50c59e2396d1fd77fec1bfa5df54721f Mon Sep 17 00:00:00 2001 From: naomi-simumba Date: Thu, 14 Aug 2025 13:29:15 +0100 Subject: [PATCH 02/11] fix reading underscore metrics Signed-off-by: naomi-simumba --- benchmark/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmark/utils.py b/benchmark/utils.py index e8dc57c..4127ee7 100644 --- a/benchmark/utils.py +++ b/benchmark/utils.py @@ -210,7 +210,9 @@ def extract_repeated_experiment_results( seed = int(run.info.run_name.split("_")[-1]) if task in task_info: metric_name = task_info[task] - metric_name = 'test_test/' + metric_name.split("/")[-1] + name_1 = 'test_test/' + metric_name.split("/")[-1] + name_2 = 'test_test_' + task.metric.replace(task.metric.split('_')[0] + "_", '') + metric_name = name_1 if '/' in task.metric else name_2 else: continue From b05ec337bda90e0b775eea403632fe1187c67c69 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:20:23 -0300 Subject: [PATCH 03/11] change input param: list of experiments to experiment name so we can test summarize function Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 22 ++++--- benchmark/utils.py | 151 +++++++++++++++++++++++---------------------- run_tests.py | 1 + 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/benchmark/main.py b/benchmark/main.py index 8d7128f..95b43aa 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -41,7 +41,7 @@ def main(): help="summarize results from repeated experiments", action="store_true", ) - parser.add_argument('--list_of_experiment_names', type=list[str]) + parser.add_argument('--list_of_experiment_names', type=str, nargs=1) parser.add_argument('--task_names', type=list[str]) parser.add_argument('--task_metrics', type=list[str]) parser.add_argument( @@ -52,15 +52,17 @@ def main(): args = parser.parse_args() paths: List[Any] = args.config + assert isinstance(paths, list), f"Error! {paths=} is not a list" + assert len(paths) > 0 path = paths[0] config = parser.parse_path(path) config_init = parser.instantiate_classes(config) - summarize = args.summarize + summarize: bool = args.summarize assert isinstance(summarize, bool), f"Error! {summarize=} is not a bool" - repeat = args.repeat + repeat: bool = args.repeat assert isinstance(repeat, bool), f"Error! {repeat=} is not a bool" - hpo = args.hpo + hpo: bool = args.hpo assert isinstance(hpo, bool), f"Error! {hpo=} is not a bool" storage_uri = config_init.storage_uri @@ -79,12 +81,12 @@ def main(): hpo is False and repeat is False ), f"Error! both {repeat=} and {hpo=} must be False when summarizing results from multiple experiments." - list_of_experiment_names = config_init.list_of_experiment_names + experiment_name = config_init.list_of_experiment_names assert isinstance( - list_of_experiment_names, list - ), f"Error! {list_of_experiment_names=} is not a list" - for exp in list_of_experiment_names: - assert isinstance(exp, str), f"Error! {exp=} is not a str" + experiment_name, str + ), f"Error! {experiment_name=} is not a str" + # for exp in experiment_name: + # assert isinstance(exp, str), f"Error! {exp=} is not a str" task_names = config_init.task_names assert isinstance(task_names, list), f"Error! {task_names=} is not a list" @@ -108,7 +110,7 @@ def main(): benchmark_name=benchmark_name, storage_uri=storage_uri, logger=logger, - experiments=list_of_experiment_names, + experiments=[experiment_name], task_names=task_names, num_repetitions=run_repetitions, task_metrics=task_metrics, diff --git a/benchmark/utils.py b/benchmark/utils.py index 4127ee7..7c494ca 100644 --- a/benchmark/utils.py +++ b/benchmark/utils.py @@ -28,6 +28,7 @@ "0.01x_train": 1, } + def unflatten(dictionary: Dict[str, Any]): resultDict: Dict = {} for key, value in dictionary.items(): @@ -149,7 +150,7 @@ def extract_repeated_experiment_results( task_metrics: list, task_names: list, num_repetitions: int = REPEATED_SEEDS_DEFAULT, -) -> (pd.DataFrame, list): +) -> tuple[pd.DataFrame, list]: """ extracts results of repeated experiments from mlflow logs and saves them in csv save list of incomplete experiments to a txt file @@ -211,9 +212,11 @@ def extract_repeated_experiment_results( if task in task_info: metric_name = task_info[task] name_1 = 'test_test/' + metric_name.split("/")[-1] - name_2 = 'test_test_' + task.metric.replace(task.metric.split('_')[0] + "_", '') + name_2 = 'test_test_' + task.metric.replace( + task.metric.split('_')[0] + "_", '' + ) metric_name = name_1 if '/' in task.metric else name_2 - else: + else: continue if metric_name not in run.data.metrics: @@ -221,7 +224,7 @@ def extract_repeated_experiment_results( continue score = run.data.metrics[metric_name] if ("rmse" in metric_name) or ("RMSE" in metric_name): - score = 1-score + score = 1 - score run_names.append(run.info.run_name) exp_ids.append(experiment_id) exp_names.append(original_experiment_name) @@ -317,13 +320,9 @@ def extract_parameters( filter_string=f'tags."mlflow.runName" LIKE "{exp_parent_run_name}"', ) - logger.info( - f"experiment_parent_run_data: {len(experiment_parent_run_data)}" - ) + logger.info(f"experiment_parent_run_data: {len(experiment_parent_run_data)}") for run in experiment_parent_run_data: - logger.info( - f"{run.info.run_id}: {run.info.run_name}" - ) + logger.info(f"{run.info.run_id}: {run.info.run_name}") if (len(experiment_parent_run_data) > 1) or ( len(experiment_parent_run_data) == 0 ): @@ -332,7 +331,7 @@ def extract_parameters( It is currently {len(experiment_parent_run_data)}. Skipping." ) continue - #raise RuntimeError + # raise RuntimeError for run in experiment_parent_run_data: exp_parent_run_id = run.info.run_id @@ -346,7 +345,7 @@ def extract_parameters( for task in task_names: logger.info(f"task: {task}") - try: #doing try/except because some tasks are incomplete and will raise an error + try: # doing try/except because some tasks are incomplete and will raise an error matching_runs = [run for run in runs if run.info.run_name.endswith(task)] # type: ignore best_params = matching_runs[0].data.params @@ -354,13 +353,19 @@ def extract_parameters( best_params = {k: literal_eval(v) for k, v in best_params.items()} best_params["experiment_name"] = experiment_name best_params["dataset"] = task - best_params["decoder"] = matching_runs[0].data.tags["decoder"] if "decoder" in matching_runs[0].data.tags else "N/A" + best_params["decoder"] = ( + matching_runs[0].data.tags["decoder"] + if "decoder" in matching_runs[0].data.tags + else "N/A" + ) best_params["backbone"] = matching_runs[0].data.tags["backbone"] best_params["early_stop_patience"] = matching_runs[0].data.tags[ "early_stop_patience" ] best_params["n_trials"] = matching_runs[0].data.tags["n_trials"] - best_params["partition_name"] = matching_runs[0].data.tags["partition_name"] + best_params["partition_name"] = matching_runs[0].data.tags[ + "partition_name" + ] best_params["data_percentages"] = DATA_PARTITIONS[ best_params["partition_name"] ] @@ -414,7 +419,9 @@ def get_results_and_parameters( pd.DataFrame with results and parameters """ if Path(storage_uri).exists() and Path(storage_uri).is_dir(): - results_dir = Path(storage_uri).parents[0] / "summarized_results" / benchmark_name + results_dir = ( + Path(storage_uri).parents[0] / "summarized_results" / benchmark_name + ) else: logger.info("Please use a valid directory for storage_uri") raise ValueError @@ -449,11 +456,11 @@ def get_results_and_parameters( ) visualize_combined_results( - combined_results=results_and_parameters, - storage_uri=storage_uri, - logger=logger, - plot_file_base_name=f"multiple_models_{benchmark_name}", - ) + combined_results=results_and_parameters, + storage_uri=storage_uri, + logger=logger, + plot_file_base_name=f"multiple_models_{benchmark_name}", + ) return results_and_parameters @@ -602,7 +609,7 @@ def check_existing_experiments( exp_parent_run_name: str, task_names: list, n_trials: int, - backbone: str + backbone: str, ) -> Dict[str, Any]: """ checks if experiment has been completed (i.e. both task run and nested individual runs are complete) @@ -743,62 +750,54 @@ def visualize_combined_results( zip(model_order, sns.color_palette("tab20", n_colors=len(model_order))) ) - if True: - # plot raw values - plot_tools.plot_per_dataset( - combined_results, + # plot raw values + plot_tools.plot_per_dataset( + combined_results, + model_order=model_order, + model_colors=model_colors, + metric="test metric", + sharey=False, + inner="points", + fig_size=fig_size, + n_legend_rows=n_legend_rows, + ) + plt.savefig( + str(plots_folder / f"violin_{plot_file_base_name}_raw.png"), + bbox_inches="tight", + ) + plt.close() + + # plot normalized, bootstrapped values values + plot_tools.make_normalizer( + combined_results, + metrics=("test metric",), + benchmark_name=plot_file_base_name, + normalizer_folder=normalizer_folder, + ) + bootstrapped_iqm, normalized_combined_results = ( + plot_tools.normalize_bootstrap_and_plot( + df=combined_results, + metric="test metric", + normalizer_folder=normalizer_folder, + benchmark_name=plot_file_base_name, model_order=model_order, model_colors=model_colors, - metric="test metric", - sharey=False, - inner="points", fig_size=fig_size, n_legend_rows=n_legend_rows, ) - plt.savefig( - str(plots_folder / f"violin_{plot_file_base_name}_raw.png"), - bbox_inches="tight", - ) - plt.close() - - # plot normalized, bootstrapped values values - plot_tools.make_normalizer( - combined_results, - metrics=("test metric",), - benchmark_name=plot_file_base_name, - normalizer_folder= normalizer_folder - ) - bootstrapped_iqm, normalized_combined_results = ( - plot_tools.normalize_bootstrap_and_plot( - df=combined_results, - metric="test metric", - normalizer_folder=normalizer_folder, - benchmark_name=plot_file_base_name, - model_order=model_order, - model_colors=model_colors, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - ) + ) - plt.savefig( - str( - plots_folder - / f"violin_{plot_file_base_name}_normalized_bootstrapped.png" - ), - bbox_inches="tight", - ) - plt.close() - bootstrapped_iqm.to_csv( - str(tables_folder / f"{plot_file_base_name}_bootstrapped_iqm.csv") - ) - combined_results.to_csv( - str( - tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv" - ) - ) - else: #except Exception as e: - logger.info(f"could not visualize due to error: {e}") + plt.savefig( + str(plots_folder / f"violin_{plot_file_base_name}_normalized_bootstrapped.png"), + bbox_inches="tight", + ) + plt.close() + bootstrapped_iqm.to_csv( + str(tables_folder / f"{plot_file_base_name}_bootstrapped_iqm.csv") + ) + combined_results.to_csv( + str(tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv") + ) def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.RootLogger: @@ -822,6 +821,7 @@ def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.Root logging.basicConfig(level=logging.CRITICAL) return logger + def import_custom_modules( logger: logging.RootLogger, custom_modules_path: str | Path | None = None, @@ -843,12 +843,17 @@ def import_custom_modules( module = importlib.import_module(module_dir) logger.info(f"Found {custom_modules_path}") except ImportError: - raise ImportError(f"It was not possible to import modules from {custom_modules_path}.") + raise ImportError( + f"It was not possible to import modules from {custom_modules_path}." + ) else: - raise ValueError(f"Modules path {custom_modules_path} isn't a directory. Check if you have defined it properly.") + raise ValueError( + f"Modules path {custom_modules_path} isn't a directory. Check if you have defined it properly." + ) else: logger.debug("No custom module is being used.") + if __name__ == "__main__": logger = get_logger() storage_uri = "results_folder/hpo" # storage_uri from config diff --git a/run_tests.py b/run_tests.py index 95f5fc4..203b63c 100644 --- a/run_tests.py +++ b/run_tests.py @@ -32,6 +32,7 @@ def run_tests(test_id: Optional[str] = None): out_file.unlink(missing_ok=True) assert not out_file.exists() jbsub = f"bsub -e {err_file} -o {out_file} -M 40G -gpu \"num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB\" pytest -vv tests/test_benchmark.py::test_run_benchmark[{tc_id}]" + cmd = jbsub.split() result = subprocess.run(cmd, capture_output=True) if result.returncode == 0: From d311041c4a7d6b1ef7a97b55b17d1faa967f9a7f Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:22:48 -0300 Subject: [PATCH 04/11] fix invalid param name Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/main.py b/benchmark/main.py index 95b43aa..2c50aae 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -81,7 +81,7 @@ def main(): hpo is False and repeat is False ), f"Error! both {repeat=} and {hpo=} must be False when summarizing results from multiple experiments." - experiment_name = config_init.list_of_experiment_names + experiment_name = config_init.experiment_name assert isinstance( experiment_name, str ), f"Error! {experiment_name=} is not a str" From b965a5b84d68164d1f9ae36a60939fe32b6a6074 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:28:54 -0300 Subject: [PATCH 05/11] fix task names and task metrics Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 6 +++--- configs/tests/benchmark_v2_simple.yaml | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/benchmark/main.py b/benchmark/main.py index 2c50aae..6975f58 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -41,9 +41,9 @@ def main(): help="summarize results from repeated experiments", action="store_true", ) - parser.add_argument('--list_of_experiment_names', type=str, nargs=1) - parser.add_argument('--task_names', type=list[str]) - parser.add_argument('--task_metrics', type=list[str]) + parser.add_argument('--experiment_name', type=str) + parser.add_argument('--task_names', type=list[str], nargs='+', default=[]) + parser.add_argument('--task_metrics', type=list[str], nargs='+', default=[]) parser.add_argument( '--benchmark_name', type=str, diff --git a/configs/tests/benchmark_v2_simple.yaml b/configs/tests/benchmark_v2_simple.yaml index 608bc64..3d0cb83 100644 --- a/configs/tests/benchmark_v2_simple.yaml +++ b/configs/tests/benchmark_v2_simple.yaml @@ -21,7 +21,6 @@ tasks: - name: chesapeake type: segmentation direction: max - metric: val/Multiclass_Jaccard_Index early_stop_patience: 10 terratorch_task: loss: ce From a36f6dbebf391e617b2e61e8a015db401e6e46d9 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:31:10 -0300 Subject: [PATCH 06/11] handling single tasks Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmark/main.py b/benchmark/main.py index 6975f58..1c1f78c 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -89,11 +89,15 @@ def main(): # assert isinstance(exp, str), f"Error! {exp=} is not a str" task_names = config_init.task_names + if isinstance(task_names, str): + task_names = [task_names] assert isinstance(task_names, list), f"Error! {task_names=} is not a list" for t in task_names: assert isinstance(t, str), f"Error! {t=} is not a str" task_metrics = config_init.task_metrics + if isinstance(task_metrics, str): + task_metrics = [task_metrics] assert isinstance(task_metrics, list), f"Error! {task_metrics=} is not a list" for t in task_metrics: assert isinstance(t, str), f"Error! {t=} is not a str" From e4bcee5fe36a49b6f2972b49d6d28367b96f9a43 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:37:01 -0300 Subject: [PATCH 07/11] remove redundant param Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/main.py b/benchmark/main.py index 1c1f78c..890aafe 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -41,7 +41,7 @@ def main(): help="summarize results from repeated experiments", action="store_true", ) - parser.add_argument('--experiment_name', type=str) + # parser.add_argument('--experiment_name', type=str)s parser.add_argument('--task_names', type=list[str], nargs='+', default=[]) parser.add_argument('--task_metrics', type=list[str], nargs='+', default=[]) parser.add_argument( From 894e71c88f8f9b36073be1de676d840ad524fb91 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:41:10 -0300 Subject: [PATCH 08/11] fix task name param Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/benchmark/main.py b/benchmark/main.py index 890aafe..f31fe5a 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -42,8 +42,12 @@ def main(): action="store_true", ) # parser.add_argument('--experiment_name', type=str)s - parser.add_argument('--task_names', type=list[str], nargs='+', default=[]) - parser.add_argument('--task_metrics', type=list[str], nargs='+', default=[]) + parser.add_argument( + '--task_names', type=list[str], nargs='+', action='append', default=[] + ) + parser.add_argument( + '--task_metrics', type=list[str], nargs='+', action='append', default=[] + ) parser.add_argument( '--benchmark_name', type=str, From 52e95839a8797151dd6a793f6241170f0b7e0ba9 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 20:42:27 -0300 Subject: [PATCH 09/11] fix task name param Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/main.py b/benchmark/main.py index f31fe5a..77b4ebc 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -43,10 +43,10 @@ def main(): ) # parser.add_argument('--experiment_name', type=str)s parser.add_argument( - '--task_names', type=list[str], nargs='+', action='append', default=[] + '--task_names', nargs='+', action='append', default=[] ) parser.add_argument( - '--task_metrics', type=list[str], nargs='+', action='append', default=[] + '--task_metrics', nargs='+', action='append', default=[] ) parser.add_argument( '--benchmark_name', From ad86761b8bc723bff16d92e480b2d4f9a65dfa8d Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 21:53:35 -0300 Subject: [PATCH 10/11] fix num repetitions Signed-off-by: Leonardo P. Tizzei --- benchmark/main.py | 247 +++++++++++++------------ configs/tests/benchmark_v2_simple.yaml | 3 +- 2 files changed, 129 insertions(+), 121 deletions(-) diff --git a/benchmark/main.py b/benchmark/main.py index 77b4ebc..83ef807 100644 --- a/benchmark/main.py +++ b/benchmark/main.py @@ -34,6 +34,11 @@ def main(): parser.add_argument('--report_on_best_val', type=bool, default=True) parser.add_argument('--test_models', type=bool, default=False) parser.add_argument('--bayesian_search', type=bool, default=True) + parser.add_argument( + '--benchmark_name', + type=str, + help="name of summarized results file", + ) parser.add_argument("--hpo", help="optimize hyperparameters", action="store_true") parser.add_argument("--repeat", help="repeat best experiments", action="store_true") parser.add_argument( @@ -42,17 +47,8 @@ def main(): action="store_true", ) # parser.add_argument('--experiment_name', type=str)s - parser.add_argument( - '--task_names', nargs='+', action='append', default=[] - ) - parser.add_argument( - '--task_metrics', nargs='+', action='append', default=[] - ) - parser.add_argument( - '--benchmark_name', - type=str, - help="name of summarized results file", - ) + parser.add_argument('--task_names', nargs='+', action='append', default=[]) + parser.add_argument('--task_metrics', nargs='+', action='append', default=[]) args = parser.parse_args() paths: List[Any] = args.config @@ -78,6 +74,9 @@ def main(): else: logging.config.fileConfig(fname=logger_path, disable_existing_loggers=False) logger = logging.getLogger("terratorch-iterate") + + run_repetitions = config_init.run_repetitions + assert isinstance(run_repetitions, int), f"Error! {run_repetitions=} is invalid" # only summarize results from multiple experiments if summarize: @@ -107,12 +106,10 @@ def main(): assert isinstance(t, str), f"Error! {t=} is not a str" benchmark_name = config_init.benchmark_name + if benchmark_name is None: + benchmark_name = "summary.csv" assert isinstance(benchmark_name, str), f"Error! {benchmark_name=} is not a str" - run_repetitions = config_init.run_repetitions - assert ( - isinstance(run_repetitions, int) and run_repetitions > 0 - ), f"Error! {run_repetitions=} is invalid" # get results and parameters from mlflow logs results_and_parameters = get_results_and_parameters( benchmark_name=benchmark_name, @@ -124,119 +121,129 @@ def main(): task_metrics=task_metrics, ) return - - # optimize hyperparameters and/or do repeated runs for single experiments - assert ( - hpo is True or repeat is True - ), f"Error! either {repeat=} or {hpo=} must be True" - parent_run_id = args.parent_run_id - if parent_run_id is not None: - assert isinstance(parent_run_id, str), f"Error! {parent_run_id=} is not a str" - - # validate the objects - experiment_name = config_init.experiment_name - assert isinstance(experiment_name, str), f"Error! {experiment_name=} is not a str" - run_name = config_init.run_name - if run_name is not None: - assert isinstance(run_name, str), f"Error! {run_name=} is not a str" - # validate defaults - defaults = config_init.defaults - assert isinstance(defaults, Defaults), f"Error! {defaults=} is not a Defaults" - - tasks = config_init.tasks - assert isinstance(tasks, list), f"Error! {tasks=} is not a list" - for t in tasks: - assert isinstance(t, Task), f"Error! {t=} is not a Task" - # if there is not specific terratorch_task specified, then use default terratorch_task - if t.terratorch_task is None: - t.terratorch_task = defaults.terratorch_task - # defaults.trainer_args["max_epochs"] = 5 - - optimization_space = config_init.optimization_space - assert isinstance( - optimization_space, dict - ), f"Error! {optimization_space=} is not a dict" - - # ray_storage_path is optional - ray_storage_path = config_init.ray_storage_path - if ray_storage_path is not None: + else: + # optimize hyperparameters and/or do repeated runs for single experiments + assert ( + hpo is True or repeat is True + ), f"Error! either {repeat=} or {hpo=} must be True" + parent_run_id = args.parent_run_id + if parent_run_id is not None: + assert isinstance( + parent_run_id, str + ), f"Error! {parent_run_id=} is not a str" + + # validate the objects + experiment_name = config_init.experiment_name assert isinstance( - ray_storage_path, str - ), f"Error! {ray_storage_path=} is not a str" + experiment_name, str + ), f"Error! {experiment_name=} is not a str" + run_name = config_init.run_name + if run_name is not None: + assert isinstance(run_name, str), f"Error! {run_name=} is not a str" + # validate defaults + defaults = config_init.defaults + assert isinstance(defaults, Defaults), f"Error! {defaults=} is not a Defaults" + + tasks = config_init.tasks + assert isinstance(tasks, list), f"Error! {tasks=} is not a list" + for t in tasks: + assert isinstance(t, Task), f"Error! {t=} is not a Task" + # if there is not specific terratorch_task specified, then use default terratorch_task + if t.terratorch_task is None: + t.terratorch_task = defaults.terratorch_task + # defaults.trainer_args["max_epochs"] = 5 + + optimization_space = config_init.optimization_space + assert isinstance( + optimization_space, dict + ), f"Error! {optimization_space=} is not a dict" - n_trials = config_init.n_trials - assert isinstance(n_trials, int) and n_trials > 0, f"Error! {n_trials=} is invalid" - run_repetitions = config_init.run_repetitions + # ray_storage_path is optional + ray_storage_path = config_init.ray_storage_path + if ray_storage_path is not None: + assert isinstance( + ray_storage_path, str + ), f"Error! {ray_storage_path=} is not a str" - report_on_best_val = config_init.report_on_best_val - assert isinstance( - report_on_best_val, bool - ), f"Error! {ray_storage_path=} is not a bool" + n_trials = config_init.n_trials + assert ( + isinstance(n_trials, int) and n_trials > 0 + ), f"Error! {n_trials=} is invalid" + run_repetitions = config_init.run_repetitions - save_models = config_init.save_models - assert isinstance(save_models, bool), f"Error! {save_models=} is not a bool" + report_on_best_val = config_init.report_on_best_val + assert isinstance( + report_on_best_val, bool + ), f"Error! {ray_storage_path=} is not a bool" - test_models = config_init.test_models - assert isinstance(test_models, bool), f"Error! {test_models=} is not a bool" + save_models = config_init.save_models + assert isinstance(save_models, bool), f"Error! {save_models=} is not a bool" - bayesian_search = config_init.bayesian_search - assert isinstance(bayesian_search, bool), f"Error! {bayesian_search=} is not a bool" + test_models = config_init.test_models + assert isinstance(test_models, bool), f"Error! {test_models=} is not a bool" - # custom_modules_path is optional - custom_modules_path = config_init.custom_modules_path - if custom_modules_path is not None: + bayesian_search = config_init.bayesian_search assert isinstance( - custom_modules_path, str - ), f"Error! {custom_modules_path=} is not a str" - import_custom_modules(logger=logger, custom_modules_path=custom_modules_path) - - if repeat and not hpo: - output = config_init.output_path - if output is None: - storage_uri_path = Path(storage_uri) - assert ( - storage_uri_path.exists() and storage_uri_path.is_dir() - ), f"Error! Unable to create new output_path based on storage_uri_path because the latter does not exist: {storage_uri_path}" - output_path = storage_uri_path.parents[0] / "repeated_exp_output_csv" - output_path.mkdir(parents=True, exist_ok=True) - output_path = output_path / f"{experiment_name}_repeated_exp_mlflow.csv" - output = str(output_path) - - logger.info("Rerun best experiments...") - rerun_best_from_backbone( - logger=logger, - parent_run_id=parent_run_id, - output_path=str(output_path), - defaults=defaults, - tasks=tasks, - experiment_name=experiment_name, - storage_uri=storage_uri, - optimization_space=optimization_space, - run_repetitions=run_repetitions, - save_models=save_models, - report_on_best_val=report_on_best_val, - ) - else: - if not repeat and hpo: - run_repetitions = 0 - - # run_repetitions is an optional parameter - benchmark_backbone( - defaults=defaults, - tasks=tasks, - experiment_name=experiment_name, - storage_uri=storage_uri, - ray_storage_path=ray_storage_path, - run_name=run_name, - optimization_space=optimization_space, - n_trials=n_trials, - run_repetitions=run_repetitions, - save_models=save_models, - report_on_best_val=report_on_best_val, - test_models=test_models, - bayesian_search=bayesian_search, - logger=logger, - ) + bayesian_search, bool + ), f"Error! {bayesian_search=} is not a bool" + + # custom_modules_path is optional + custom_modules_path = config_init.custom_modules_path + if custom_modules_path is not None: + assert isinstance( + custom_modules_path, str + ), f"Error! {custom_modules_path=} is not a str" + import_custom_modules( + logger=logger, custom_modules_path=custom_modules_path + ) + + if repeat and not hpo: + output = config_init.output_path + if output is None: + storage_uri_path = Path(storage_uri) + assert ( + storage_uri_path.exists() and storage_uri_path.is_dir() + ), f"Error! Unable to create new output_path based on storage_uri_path because the latter does not exist: {storage_uri_path}" + output_path = storage_uri_path.parents[0] / "repeated_exp_output_csv" + output_path.mkdir(parents=True, exist_ok=True) + output_path = output_path / f"{experiment_name}_repeated_exp_mlflow.csv" + output = str(output_path) + + logger.info("Rerun best experiments...") + rerun_best_from_backbone( + logger=logger, + parent_run_id=parent_run_id, + output_path=str(output_path), + defaults=defaults, + tasks=tasks, + experiment_name=experiment_name, + storage_uri=storage_uri, + optimization_space=optimization_space, + run_repetitions=run_repetitions, + save_models=save_models, + report_on_best_val=report_on_best_val, + ) + else: + if not repeat and hpo: + run_repetitions = 0 + + # run_repetitions is an optional parameter + benchmark_backbone( + defaults=defaults, + tasks=tasks, + experiment_name=experiment_name, + storage_uri=storage_uri, + ray_storage_path=ray_storage_path, + run_name=run_name, + optimization_space=optimization_space, + n_trials=n_trials, + run_repetitions=run_repetitions, + save_models=save_models, + report_on_best_val=report_on_best_val, + test_models=test_models, + bayesian_search=bayesian_search, + logger=logger, + ) if __name__ == "__main__": diff --git a/configs/tests/benchmark_v2_simple.yaml b/configs/tests/benchmark_v2_simple.yaml index 3d0cb83..9e29926 100644 --- a/configs/tests/benchmark_v2_simple.yaml +++ b/configs/tests/benchmark_v2_simple.yaml @@ -78,7 +78,8 @@ tasks: - "NIR" n_trials: 2 save_models: False -storage_uri: /dccstor/geofm-finetuning/terratorch-iterate-test/ +storage_uri: /Users/ltizzei/Projects/Orgs/IBM/terratorch-iterate +# storage_uri: /dccstor/geofm-finetuning/terratorch-iterate-test/ ray_storage_path: /dccstor/geofm-finetuning/terratorch-iterate-test/ray_storage run_repetitions: 0 optimization_space: From 22c5f0db9a10ed194278edfd2265fffb7bfafc1f Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 18 Aug 2025 21:57:46 -0300 Subject: [PATCH 11/11] fix benchmark config Signed-off-by: Leonardo P. Tizzei --- configs/tests/benchmark_v2_simple.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/configs/tests/benchmark_v2_simple.yaml b/configs/tests/benchmark_v2_simple.yaml index 9e29926..3d0cb83 100644 --- a/configs/tests/benchmark_v2_simple.yaml +++ b/configs/tests/benchmark_v2_simple.yaml @@ -78,8 +78,7 @@ tasks: - "NIR" n_trials: 2 save_models: False -storage_uri: /Users/ltizzei/Projects/Orgs/IBM/terratorch-iterate -# storage_uri: /dccstor/geofm-finetuning/terratorch-iterate-test/ +storage_uri: /dccstor/geofm-finetuning/terratorch-iterate-test/ ray_storage_path: /dccstor/geofm-finetuning/terratorch-iterate-test/ray_storage run_repetitions: 0 optimization_space: