From 3143f01505d8b74b0ddb08c679870ba073eadc24 Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 11 Jul 2025 13:00:03 +0000 Subject: [PATCH 01/16] generalise monitoring --- terratorch_iterate/model_fitting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/terratorch_iterate/model_fitting.py b/terratorch_iterate/model_fitting.py index 93a367b..9a25ed3 100644 --- a/terratorch_iterate/model_fitting.py +++ b/terratorch_iterate/model_fitting.py @@ -286,10 +286,10 @@ def launch_training( client = mlflow.tracking.MlflowClient( tracking_uri=storage_uri, ) - - if not metric.startswith("val/"): + pdb.set_trace() + if not metric.startswith("val"): raise Exception( - f"Metric {metric} does not start with `val/`. Please choose a validation metric" + f"Metric {metric} does not start with `val`. Please choose a validation metric" ) for_pd_collect = [] val_metrics_names = [] From 1640513e17f2c83d0ae953c24061761dc70bdcf1 Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 11 Jul 2025 13:01:03 +0000 Subject: [PATCH 02/16] generalise modelling --- terratorch_iterate/model_fitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terratorch_iterate/model_fitting.py b/terratorch_iterate/model_fitting.py index 9a25ed3..82ea59d 100644 --- a/terratorch_iterate/model_fitting.py +++ b/terratorch_iterate/model_fitting.py @@ -286,7 +286,7 @@ def launch_training( client = mlflow.tracking.MlflowClient( tracking_uri=storage_uri, ) - pdb.set_trace() + if not metric.startswith("val"): raise Exception( f"Metric {metric} does not start with `val`. Please choose a validation metric" From 8bc4c5ec071e69d94bd8e5c99d6d4b7fa3b05563 Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Sun, 13 Jul 2025 09:53:34 +0000 Subject: [PATCH 03/16] add type for Geobench --- benchmark/benchmark_types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark_types.py b/benchmark/benchmark_types.py index bf8ccab..a3d9610 100644 --- a/benchmark/benchmark_types.py +++ b/benchmark/benchmark_types.py @@ -15,6 +15,7 @@ ObjectDetectionTask, ) from torchgeo.datamodules import BaseDataModule +from geobench_v2.datamodules import GeoBenchDataModule valid_task_types = type[ SemanticSegmentationTask @@ -116,7 +117,7 @@ class Task: name (str): Name for this task type (TaskTypeEnum): Type of task. terratorch_task (dict): Arguments for the Terratorch Task. - datamodule (BaseDataModule): Datamodule to be used. + datamodule (BaseDataModule | GeoBenchDataModule): Datamodule to be used. direction (str): One of min or max. Direction to optimize the metric in. metric (str): Metric to be optimized. Defaults to "val/loss". early_prune (bool): Whether to prune unpromising runs early. Defaults to False. @@ -128,7 +129,7 @@ class Task: name: str type: TaskTypeEnum - datamodule: BaseDataModule + datamodule: BaseDataModule | GeoBenchDataModule direction: str terratorch_task: Optional[dict[str, Any]] = None metric: str = "val/loss" From dcf06479b8281b3204420aa97c926320b87c8c4d Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Sun, 13 Jul 2025 09:55:35 +0000 Subject: [PATCH 04/16] remove unused tt iterate folder --- terratorch_iterate/__init__.py | 0 terratorch_iterate/backbone_benchmark.py | 395 -------- terratorch_iterate/benchmark_ray.py | 256 ------ terratorch_iterate/benchmark_types.py | 175 ---- terratorch_iterate/main.py | 215 ----- terratorch_iterate/model_fitting.py | 671 -------------- terratorch_iterate/module.py | 66 -- terratorch_iterate/plot_tools.py | 256 ------ terratorch_iterate/py.typed | 0 terratorch_iterate/repeat_best_experiment.py | 468 ---------- .../resources/dataset_specifications/agb.yaml | 64 -- .../dataset_specifications/eurosat.yaml | 28 - .../dataset_specifications/fire_scars.yaml | 56 -- .../multi_temporal_crop.yaml | 57 -- .../dataset_specifications/sen1floods11.yaml | 59 -- .../sen1floods11_transforms.yaml | 67 -- terratorch_iterate/tests/__init__.py | 0 terratorch_iterate/utils.py | 866 ------------------ 18 files changed, 3699 deletions(-) delete mode 100644 terratorch_iterate/__init__.py delete mode 100644 terratorch_iterate/backbone_benchmark.py delete mode 100644 terratorch_iterate/benchmark_ray.py delete mode 100644 terratorch_iterate/benchmark_types.py delete mode 100644 terratorch_iterate/main.py delete mode 100644 terratorch_iterate/model_fitting.py delete mode 100644 terratorch_iterate/module.py delete mode 100644 terratorch_iterate/plot_tools.py delete mode 100644 terratorch_iterate/py.typed delete mode 100644 terratorch_iterate/repeat_best_experiment.py delete mode 100644 terratorch_iterate/resources/dataset_specifications/agb.yaml delete mode 100644 terratorch_iterate/resources/dataset_specifications/eurosat.yaml delete mode 100644 terratorch_iterate/resources/dataset_specifications/fire_scars.yaml delete mode 100644 terratorch_iterate/resources/dataset_specifications/multi_temporal_crop.yaml delete mode 100644 terratorch_iterate/resources/dataset_specifications/sen1floods11.yaml delete mode 100644 terratorch_iterate/resources/dataset_specifications/sen1floods11_transforms.yaml delete mode 100644 terratorch_iterate/tests/__init__.py delete mode 100644 terratorch_iterate/utils.py diff --git a/terratorch_iterate/__init__.py b/terratorch_iterate/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/terratorch_iterate/backbone_benchmark.py b/terratorch_iterate/backbone_benchmark.py deleted file mode 100644 index c586311..0000000 --- a/terratorch_iterate/backbone_benchmark.py +++ /dev/null @@ -1,395 +0,0 @@ -""" -This module contains the high level functions for benchmarking on a single node. -""" - -# import argparse -import os -import importlib -from functools import partial -from typing import Any -from pathlib import Path -import mlflow -import optuna -import pandas as pd -import torch -import logging -from optuna.pruners import HyperbandPruner -from optuna.samplers import BaseSampler, RandomSampler -from tabulate import tabulate -import pickle -from benchmark.benchmark_types import ( - Defaults, - ParameterBounds, - Task, - combine_with_defaults, - optimization_space_type, -) -from benchmark.model_fitting import fit_model, fit_model_with_hparams -from benchmark.repeat_best_experiment import rerun_best_from_backbone -from benchmark.utils import ( - check_existing_task_parent_runs, - check_existing_experiments, - unflatten, - get_logger, - sync_mlflow_optuna, - REPEATED_SEEDS_DEFAULT, -) - -direction_type_to_optuna = {"min": "minimize", "max": "maximize"} - - -def benchmark_backbone_on_task( - logger, - defaults: Defaults, - task: Task, - storage_uri: str, - experiment_name: str, - experiment_run_id: str, - task_run_id: str | None = None, - optimization_space: optimization_space_type | None = None, - n_trials: int = 1, - save_models: bool = False, - sampler: BaseSampler | None = None, - test_models: bool = False, -) -> tuple[float, str | list[str] | None, dict[str, Any]]: - - optuna_db_path = Path(storage_uri).parents[0] / "optuna_db" - if not os.path.exists(optuna_db_path): - os.makedirs(optuna_db_path) - optuna_db_path = optuna_db_path / f"{experiment_name}_{experiment_run_id}" - optuna_db_path = str(optuna_db_path) - - task_run_id = sync_mlflow_optuna( - optuna_db_path=optuna_db_path, - storage_uri=storage_uri, - experiment_name=experiment_name, - task_run_id=task_run_id, - task=task, - n_trials=n_trials, - logger=logger, - ) - - with mlflow.start_run(run_name=task.name, nested=True, run_id=task_run_id) as run: - logger.info(f"starting task run with id: {run.info.run_id}") - training_spec = combine_with_defaults(task, defaults) - if "max_epochs" not in training_spec.trainer_args: - raise Exception("Must specify max_epochs for the trainer") - task = training_spec.task - lightning_task_class = training_spec.task.type.get_class_from_enum() - - # if no optimization params, just run it - if optimization_space is None: - return ( - *fit_model( - training_spec, - lightning_task_class, - run.info.run_name, - experiment_name, - storage_uri, - run.info.run_id, - save_models=save_models, - test_models=test_models, - ), - {}, - ) - - # if optimization parameters specified, do hyperparameter tuning - study = optuna.create_study( - sampler=sampler, - direction=direction_type_to_optuna[ - training_spec.task.direction - ], # in the future may want to allow user to specify this - pruner=HyperbandPruner(), - study_name=task.name, - storage="sqlite:///{}.db".format(optuna_db_path), - load_if_exists=True, - ) - - objective = partial( - fit_model_with_hparams, - training_spec, - lightning_task_class, - task.name, - experiment_name, - optimization_space, - storage_uri, - run.info.run_id, - save_models, - test_models, - ) - - n_trials = n_trials - len(study.trials) - for trial in study.trials: - if (trial.state == optuna.trial.TrialState.FAIL) | ( - trial.state == optuna.trial.TrialState.RUNNING - ): - n_trials = n_trials + 1 - - study.optimize( - objective, - n_trials=n_trials, - # callbacks=[champion_callback], - catch=[torch.cuda.OutOfMemoryError], - ) - - tags = { - "early_stop_patience": str(training_spec.task.early_stop_patience), - "partition_name": str(training_spec.task.datamodule.partition) if hasattr(training_spec.task.datamodule, 'partition') else 'default', - "decoder": str(training_spec.task.terratorch_task["model_args"]["decoder"]), - "backbone": str( - training_spec.task.terratorch_task["model_args"]["backbone"] - ), - "n_trials": str(n_trials), - } - mlflow.set_tags(tags) - - best_params = unflatten(study.best_trial.params) - mlflow.log_params(best_params) # unflatten - mlflow.log_metric(f"best_{task.metric}", study.best_value) - return study.best_value, task.metric, best_params - - -# Custom function to parse the optimization space argument -def parse_optimization_space(space: dict | None) -> optimization_space_type | None: - if space is None: - return None - parsed_space: optimization_space_type = {} - for key, value in space.items(): - if isinstance(value, dict): - try: - bounds = ParameterBounds(**value) - parsed_space[key] = bounds - except TypeError: - # Recursively parse nested optimization spaces - parsed_space[key] = parse_optimization_space(value) - elif isinstance(value, list): - # If it's a list, leave it as is - parsed_space[key] = value - else: - raise ValueError(f"Invalid type for {key}: {value}") - return parsed_space - - -def benchmark_backbone( - defaults: Defaults, - tasks: list[Task], - experiment_name: str, - storage_uri: str, - logger: logging.RootLogger | None, - ray_storage_path: str | None = None, - backbone_import: str | None = None, - run_name: str | None = None, - n_trials: int = 1, - optimization_space: dict | None = None, - save_models: bool = False, - run_id: str | None = None, - description: str = "No description provided", - bayesian_search: bool = True, - continue_existing_experiment: bool = True, - test_models: bool = False, - run_repetitions: int = REPEATED_SEEDS_DEFAULT, - report_on_best_val: bool = True, -) -> str: - """Highest level function to benchmark a backbone using a single node - - Args: - defaults (Defaults): Defaults that are set for all tasks - tasks (list[Task]): List of Tasks to benchmark over. Will be combined with defaults to get the final parameters of the task. - experiment_name (str): Name of the MLFlow experiment to be used. - storage_uri (str): Path to MLFLow storage location. - ray_storage_path (str | None): Ignored. Exists for compatibility with ray configs. - backbone_import (str | None): Path to module that will be imported to register a potential new backbone. Defaults to None. - run_name (str | None, optional): Name of highest level mlflow run. Defaults to None. - n_trials (int, optional): Number of hyperparameter optimization trials to run. Defaults to 1. - optimization_space (dict | None): Parameters to optimize over. Should be a dictionary (may be nested) - of strings (parameter name) to list (discrete set of possibilities) or ParameterBounds, defining a range to optimize over. The structure should be the same as would be passed under tasks.terratorch_task. Defaults to None. - save_models (bool, optional): Whether to save the model. Defaults to False. - run_id (str | None): id of existing mlflow run to use as top-level run. Useful to add more experiments to a previous benchmark run. Defaults to None. - description (str): Optional description for mlflow parent run. - bayesian_search (bool): Whether to use bayesian optimization for the hyperparameter search. False uses random sampling. Defaults to True. - run_repetitions (int): Number of times that the experiment will be repeated. Defaults to 1. - """ - base = Path(storage_uri).parents[0] - PATH_TO_JOB_TRACKING = base / "job_progress_tracking" - REPEATED_EXP_FOLDER = base / "repeated_exp_output_csv" - - if logger is None: - logger = get_logger(log_folder=str(base / "job_logs")) - - if not os.path.exists(REPEATED_EXP_FOLDER): - os.makedirs(REPEATED_EXP_FOLDER) - if not os.path.exists(PATH_TO_JOB_TRACKING): - os.makedirs(PATH_TO_JOB_TRACKING) - - if backbone_import: - importlib.import_module(backbone_import) - - mlflow.set_tracking_uri(storage_uri) - mlflow.set_experiment(experiment_name) - - if bayesian_search: - sampler: BaseSampler | None = None # take the default - else: - sampler = RandomSampler() - - optimization_space = parse_optimization_space(optimization_space) - table_columns = ["Task", "Metric", "Best Score", "Hyperparameters"] - table_entries = [] - - backbone: str = defaults.terratorch_task["model_args"]["backbone"] - task_names = [task.name for task in tasks] - run_name = f"top_run_{experiment_name}" if run_name is None else run_name - - completed_task_run_names = [] - run_hpo = True - task_run_to_id_match = {} - if continue_existing_experiment: - # find status of existing runs, and delete incomplete runs except one with the most complete tasks - existing_experiments = check_existing_experiments( - logger=logger, - storage_uri=storage_uri, - experiment_name=experiment_name, - exp_parent_run_name=run_name, - task_names=task_names, - n_trials=n_trials, - backbone=backbone, - ) - if existing_experiments["no_existing_runs"]: - logger.info("\nStarting new experiment from scratch") - else: - if (existing_experiments["incomplete_run_to_finish"] is not None) and ( - run_id is None - ): - logger.info("Continuing previous experiment parent run") - run_id = existing_experiments["incomplete_run_to_finish"] - experiment_id = existing_experiments["experiment_id"] - run_hpo = True - - if existing_experiments["finished_run"] is not None: - run_hpo = False - finished_run_id = existing_experiments["finished_run"] - run_id = existing_experiments["finished_run"] - - # get previously completed tasks - completed_task_run_names, _, task_run_to_id_match = ( - check_existing_task_parent_runs( - logger, run_id, storage_uri, experiment_name, n_trials - ) - ) - - table_entries_filename = str( - PATH_TO_JOB_TRACKING / f"{experiment_name}-{run_id}_table_entries.pkl" - ) - if os.path.exists(table_entries_filename): - with open(table_entries_filename, 'rb') as handle: - table_entries = pickle.load(handle) - else: - logger.info("Starting new experiment from scratch") - - # only run hyperparameter optimization (HPO) if there are no experiments with finished HPO - if run_hpo: - logger.info("Running hyperparameter optimization") - with mlflow.start_run( - run_name=run_name, run_id=run_id, description=description - ) as run: - for task in tasks: - # only run task if it was not completed before - task_run_name = task.name - if task_run_name in completed_task_run_names: - logger.info(f"{task_run_name} already completed") - continue - else: - logger.info(f"{task_run_name} not completed. starting now") - - task_run_id = ( - task_run_to_id_match[task_run_name] - if task_run_name in task_run_to_id_match - else None - ) - best_value, metric_name, hparams = benchmark_backbone_on_task( - logger, - defaults, - task, - storage_uri, - experiment_name, - experiment_run_id=run.info.run_id, - task_run_id=task_run_id, - optimization_space=optimization_space, - n_trials=n_trials, - save_models=save_models, - sampler=sampler, - test_models=test_models, - ) - table_entries.append([task.name, metric_name, best_value, hparams]) - table_entries_filename = str( - PATH_TO_JOB_TRACKING - / f"{experiment_name}-{run.info.run_id}_table_entries.pkl" - ) - with open(table_entries_filename, 'wb') as handle: - pickle.dump(table_entries, handle, protocol=pickle.HIGHEST_PROTOCOL) - - table = tabulate(table_entries, headers=table_columns) - logger.info(table) - df = pd.DataFrame(data=table_entries, columns=table_columns) - df.set_index("Task") - logger.info("Starting to save results") - mlflow.log_table( - df, - "results_table.json", - run.info.run_id, - ) - experiment_id = run.info.experiment_id - - # check completion of HPO for all tasks before proceeding to next stage - existing_experiments = check_existing_experiments( - logger=logger, - storage_uri=storage_uri, - experiment_name=experiment_name, - exp_parent_run_name=run_name, - task_names=task_names, - n_trials=n_trials, - backbone=backbone - ) - if existing_experiments["finished_run"] is not None: - finished_run_id = existing_experiments["finished_run"] - else: - logger.info("HPO is not complete. Please re-run this experiment") - raise RuntimeError - logger.info("HPO complete") - - logger.info(f"run_repetitions: {run_repetitions}") - - if run_repetitions >= 1: - # run repeated experiments - logger.info( - f"Now running {run_repetitions} repeats per experiment \n\ - Parent run: {finished_run_id} \n\ - Experiment name: {experiment_name} \n\ - " - ) - path_to_final_results = str( - REPEATED_EXP_FOLDER / f"{experiment_name}_repeated_exp_mlflow.csv" - ) - - rerun_best_from_backbone( - logger=logger, - parent_run_id=finished_run_id, - output_path=path_to_final_results, - defaults=defaults, - tasks=tasks, - experiment_name=experiment_name, - storage_uri=storage_uri, - tmp_dir=ray_storage_path, - backbone_import=backbone_import, - run_name=run_name, - n_trials=n_trials, - ray_storage_path=ray_storage_path, - optimization_space=optimization_space, - save_models=save_models, - description=description, - use_ray=False, - run_repetitions=run_repetitions, - report_on_best_val=report_on_best_val, - ) - - return finished_run_id diff --git a/terratorch_iterate/benchmark_ray.py b/terratorch_iterate/benchmark_ray.py deleted file mode 100644 index 81eed60..0000000 --- a/terratorch_iterate/benchmark_ray.py +++ /dev/null @@ -1,256 +0,0 @@ -""" -This module contains the high level functions for benchmarking on a single node. -""" - -import importlib -import os - -import mlflow -import pandas as pd -import ray -from jsonargparse import CLI -from ray.tune.search import SearchAlgorithm, Searcher -from ray.tune.search.basic_variant import BasicVariantGenerator -from ray.tune.search.optuna import OptunaSearch -from tabulate import tabulate - -from benchmark.backbone_benchmark import parse_optimization_space -from benchmark.benchmark_types import ( - Defaults, - Task, - TrainingSpec, - combine_with_defaults, - optimization_space_type, -) -from benchmark.model_fitting import fit_model, ray_tune_model, valid_task_types - - -def benchmark_backbone_on_task( - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - storage_uri: str, - experiment_name: str, - ray_storage_path: str, - optimization_space: optimization_space_type | None = None, - n_trials: int = 1, - save_models: bool = False, - backbone_import: str | None = None, - searcher: SearchAlgorithm | None = None, -) -> dict: - if not searcher: - raise ValueError("Searcher must not be None") - with mlflow.start_run( - run_name=training_spec.task.name, - nested=True, - ) as run: - # if no optimization params, just run it - if optimization_space is None: - raise Exception("For no optimization space, run benchmark.py") - - results = ray_tune_model( - training_spec, - lightning_task_class, - optimization_space, - storage_uri, - ray_storage_path, - experiment_name, - save_models, - n_trials, - backbone_import=backbone_import, - searcher=searcher, - ) - - mlflow.log_table( - results.get_dataframe(), - f"results_{run.info.run_name}.json", - run.info.run_id, - ) - if results.get_best_result().metrics is None: - raise Exception("Best result metrics were none") - if results.get_best_result().config is None: - raise Exception("Best result config was none") - - mlflow.log_params(results.get_best_result().config) - mlflow.log_metric( - f"best_{training_spec.task.metric}", - results.get_best_result().metrics[training_spec.task.metric], - ) - return { - "best_result": results.get_best_result().metrics[training_spec.task.metric], - "metric": training_spec.task.metric, - "best_config": results.get_best_result().config, - } - - -@ray.remote(num_cpus=8, num_gpus=1) -def remote_fit( - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - run_name: str, - storage_uri: str, - experiment_name: str, - parent_run_id: str, - save_models: bool, - backbone_import: str | None, -) -> float: - mlflow.set_tracking_uri(storage_uri) - mlflow.set_experiment(experiment_name) - if backbone_import: - importlib.import_module(backbone_import) - return fit_model( - training_spec, - lightning_task_class, - run_name, - experiment_name, - storage_uri, - parent_run_id, - save_models=save_models, - )[0] - - -def benchmark_backbone( - defaults: Defaults, - tasks: list[Task], - experiment_name: str, - storage_uri: str, - tmp_dir: str | None = None, - backbone_import: str | None = None, - run_name: str | None = None, - n_trials: int = 1, - ray_storage_path: str | None = None, - optimization_space: dict | None = None, - save_models: bool = False, - run_id: str | None = None, - description: str = "No description provided", - bayesian_search: bool = True, -): - """Highest level function to benchmark a backbone using a ray cluster - - Args: - tmp_dir (str): Path to temporary directory to be used for ray - defaults (Defaults): Defaults that are set for all tasks - tasks (list[Task]): List of Tasks to benchmark over. Will be combined with defaults to get the final parameters of the task. - experiment_name (str): Name of the MLFlow experiment to be used. - storage_uri (str): Path to MLFlow storage location. - ray_storage_path (str | None): Path to storage of ray outputs, including saved models, when using ray tune. Required if optimization_space is specified - backbone_import (str | None): Path to module that will be imported to register a potential new backbone. Defaults to None. - run_name (str | None, optional): Name of highest level mlflow run. Defaults to None. - n_trials (int, optional): Number of hyperparameter optimization trials to run. Defaults to 1. - optimization_space (dict | None): Parameters to optimize over. Should be a dictionary (may be nested) - of strings (parameter name) to list (discrete set of possibilities) or ParameterBounds, defining a range to optimize over. The structure should be the same as would be passed under tasks.terratorch_task. Defaults to None. - save_models (bool, optional): Whether to save the models. Defaults to False. - run_id (str | None): id of existing mlflow run to use as top-level run. Useful to add more experiments to a previous benchmark run. Defaults to None. - description (str): Optional description for mlflow parent run. - bayesian_search (bool): Whether to use bayesian optimization for the hyperparameter search. False uses random sampling. Defaults to True. - """ - if tmp_dir is None: - raise Exception("tmp_dir must be specified for runs with ray.") - os.environ["RAY_TMPDIR"] = tmp_dir - ray.init(_temp_dir=tmp_dir) - if backbone_import: - importlib.import_module(backbone_import) - mlflow.set_tracking_uri(storage_uri) - mlflow.set_experiment(experiment_name) - # mlflow.pytorch.autolog(log_datasets=False) - - if bayesian_search: - searcher: Searcher | SearchAlgorithm = OptunaSearch() - else: - searcher = BasicVariantGenerator() - - optimization_space = parse_optimization_space(optimization_space) - - table_columns = ["Task", "Metric", "Best Score", "Hyperparameters"] - table_entries = [] - - with mlflow.start_run( - run_name=run_name, run_id=run_id, description=description - ) as run: - - if optimization_space is None: - # no hparams, parallelize over tasks - ray_tasks = [] - for task in tasks: - training_spec = combine_with_defaults(task, defaults) - if "max_epochs" not in training_spec.trainer_args: - raise Exception("Must specify max_epochs for the trainer") - task = training_spec.task - lightning_task_class = training_spec.task.type.get_class_from_enum() - ray_tasks.append( - remote_fit.remote( - training_spec, - lightning_task_class, - run.info.run_name, - storage_uri, - experiment_name, - run.info.run_id, - save_models, - backbone_import, - ) - ) - results = ray.get(ray_tasks) - table_entries = [ - [ - task.name, - task.metric, - result, - None, - ] - for task, result in zip(tasks, results) - ] - else: - if ray_storage_path is None: - raise Exception( - "`ray_storage_path` must be specified if `optimization_space` is specified." - ) - # hparams, parallelize within tasks, run one task at a time. - results = [] - for task in tasks: - training_spec = combine_with_defaults(task, defaults) - if "max_epochs" not in training_spec.trainer_args: - raise Exception("Must specify max_epochs for the trainer") - task = training_spec.task - lightning_task_class = training_spec.task.type.get_class_from_enum() - results.append( - benchmark_backbone_on_task( - training_spec, - lightning_task_class, - storage_uri, - experiment_name, - ray_storage_path, - optimization_space=optimization_space, - n_trials=n_trials, - save_models=save_models, - backbone_import=backbone_import, - searcher=searcher, - ) - ) - - table_entries = [ - [ - task.name, - result["metric"], - result["best_result"], - str(result["best_config"]), - ] - for task, result in zip(tasks, results) - ] - - table = tabulate(table_entries, headers=table_columns) - print(table) - df = pd.DataFrame(data=table_entries, columns=table_columns) - df.set_index("Task") - mlflow.log_table( - df, - "results_table.json", - run.info.run_id, - ) - ray.shutdown() - - -def main(): - CLI(benchmark_backbone, fail_untyped=False) - - -if __name__ == "__main__": - main() diff --git a/terratorch_iterate/benchmark_types.py b/terratorch_iterate/benchmark_types.py deleted file mode 100644 index 4188d76..0000000 --- a/terratorch_iterate/benchmark_types.py +++ /dev/null @@ -1,175 +0,0 @@ -""" -This module defines all the types expected at input. Used for type checking by jsonargparse. -""" - -from ast import Dict -import copy -import enum -from dataclasses import dataclass, field, replace -from typing import Any, Optional, Union - -from terratorch.tasks import ( - ClassificationTask, - MultiLabelClassificationTask, - PixelwiseRegressionTask, - SemanticSegmentationTask, -) -from torchgeo.datamodules import BaseDataModule - -valid_task_types = type[ - SemanticSegmentationTask | ClassificationTask | PixelwiseRegressionTask -] - - -class TaskTypeEnum(enum.Enum): - """ - Enum for the type of task to be performed. segmentation, regression or classification. - """ - - segmentation = "segmentation" - regression = "regression" - classification = "classification" - multilabel_classification = "multilabel_classification" - - def get_class_from_enum( - self, - ) -> valid_task_types: - match self: - case TaskTypeEnum.segmentation: - return SemanticSegmentationTask - case TaskTypeEnum.regression: - return PixelwiseRegressionTask - case TaskTypeEnum.classification: - return ClassificationTask - case TaskTypeEnum.multilabel_classification: - return MultiLabelClassificationTask - case _: - raise TypeError("Task type does not exist") - - -class ParameterTypeEnum(enum.Enum): - """ - Enum for the type of parameter allowed in ParameterBounds. integer or real. - """ - - integer = "int" - real = "real" - - -@dataclass -class ParameterBounds: - """ - Dataclass defining a numerical range to search over. - - Args: - min (float | int): Minimum. - max (float | int): Maximum. - type (ParameterTypeEnum): Whether the range is in the space of integers or real numbers. - log (bool): Whether to search over the log space (useful for parameters that vary wildly in scale, e.g. learning rate) - """ - - min: float | int - max: float | int - type: ParameterTypeEnum - log: bool = False - - def __post_init__(self): - if not isinstance(self.type, ParameterTypeEnum): - self.type = ParameterTypeEnum(self.type) - - -optimization_space_type = dict[ - str, Union[list, ParameterBounds, 'optimization_space_type'] -] - - -@dataclass -class Defaults: - """ - Default parameters set for each of the tasks. - - These parameters will be combined with task specific ones to form the final parameters for the Terratorch training. - - Args: - trainer_args (dict): Arguments passed to Lightning Trainer. - terratorch_task (dict): Arguments for the Terratorch Task. - """ - - trainer_args: dict[str, Any] = field(default_factory=dict) - terratorch_task: dict[str, Any] = field(default_factory=dict) - - -@dataclass -class Task: - """ - Parameters passed to define each of the tasks. - - These parameters are combined with any specified defaults to generate the final task parameters. - - Args: - name (str): Name for this task - type (TaskTypeEnum): Type of task. - terratorch_task (dict): Arguments for the Terratorch Task. - datamodule (BaseDataModule): Datamodule to be used. - direction (str): One of min or max. Direction to optimize the metric in. - metric (str): Metric to be optimized. Defaults to "val/loss". - early_prune (bool): Whether to prune unpromising runs early. Defaults to False. - early_stop_patience (int, None): Whether to use Lightning early stopping of runs. Defaults to None, which does not do early stopping. - optimization_except (str[str]): HyperParameters from the optimization space to be ignored for this task. - max_run_duration (str, None): maximum allowed run duration in the form DD:HH:MM:SS; will stop a run after this - amount of time. Defaults to None, which doesn't stop runs by time. - """ - - name: str - type: TaskTypeEnum - datamodule: BaseDataModule - direction: str - terratorch_task: Optional[dict[str, Any]] = None - metric: str = "val/loss" - early_prune: bool = False - early_stop_patience: int | None = None - optimization_except: set[str] = field(default_factory=set) - max_run_duration: str | None = None - - -@dataclass -class TrainingSpec: - task: Task - trainer_args: dict[str, Any] = field(default_factory=dict) - - -def recursive_merge(first_dict: dict[str, Any], second_dict: dict[str, Any]): - - # consider using deepmerge instead of this - for key, val in second_dict.items(): - if key not in first_dict: - first_dict[key] = val - else: - # if it is a dictionary, recurse deeper - if isinstance(val, dict): - recursive_merge(first_dict[key], val) - # if it is not further nested, just replace the value - else: - first_dict[key] = val - - -def combine_with_defaults(task: Task, defaults: Defaults) -> TrainingSpec: - """ - Combine task-specific parameters with default parameters. - - Args: - task (Task): Task object containing task-specific parameters. - defaults (Defaults): Defaults object containing default parameters. - - Returns: - TrainingSpec: TrainingSpec object containing combined parameters. - """ - terratorch_task: Optional[Dict[str, Any]] = copy.deepcopy(defaults.terratorch_task) - if terratorch_task is None: - terratorch_task = {} - if task.terratorch_task is None: - task.terratorch_task = {} - # merge task specific args with default args - recursive_merge(terratorch_task, task.terratorch_task) - task_with_defaults = replace(task, terratorch_task=terratorch_task) - return TrainingSpec(task_with_defaults, defaults.trainer_args) diff --git a/terratorch_iterate/main.py b/terratorch_iterate/main.py deleted file mode 100644 index ebad90c..0000000 --- a/terratorch_iterate/main.py +++ /dev/null @@ -1,215 +0,0 @@ -import logging -import uuid -from pathlib import Path -from typing import Any, List -from jsonargparse import ArgumentParser -from benchmark.backbone_benchmark import benchmark_backbone -from benchmark.benchmark_types import Defaults, Task -from benchmark.repeat_best_experiment import rerun_best_from_backbone -from benchmark.utils import (get_logger, import_custom_modules, - get_results_and_parameters, extract_parameters) - -def main(): - print("Running terratorch-iterate...") - parser = ArgumentParser() - - parser.add_argument('--defaults', type=Defaults) # to ignore model - parser.add_argument('--optimization_space', type=dict) # to ignore model - parser.add_argument('--experiment_name', type=str) # to ignore model - parser.add_argument('--run_name', type=str) # to ignore model - parser.add_argument('--save_models', type=bool) # to ignore model - parser.add_argument('--storage_uri', type=str) # to ignore model - parser.add_argument('--ray_storage_path', type=str) # to ignore model - parser.add_argument('--n_trials', type=int) # to ignore model - parser.add_argument('--run_repetitions', type=int) # to ignore model - parser.add_argument('--tasks', type=list[Task]) - parser.add_argument("--parent_run_id", type=str) - parser.add_argument("--output_path", type=str) - parser.add_argument("--logger", type=str) - parser.add_argument("--config", action="config") - parser.add_argument('--custom_modules_path', type=str) - parser.add_argument('--report_on_best_val', type=bool, default=True) - parser.add_argument('--test_models', type=bool, default=False) - parser.add_argument('--bayesian_search', type=bool, default=True) - parser.add_argument("--hpo", help="optimize hyperparameters", action="store_true") - parser.add_argument("--repeat", help="repeat best experiments", action="store_true") - parser.add_argument("--summarize", help="summarize results from repeated experiments", action="store_true") - - - - args = parser.parse_args() - paths: List[Any] = args.config - path = paths[0] - config = parser.parse_path(path) - config_init = parser.instantiate_classes(config) - - #summarize results from multiple experiments - summarize = args.summarize - assert isinstance(summarize, bool), f"Error! {summarize=} is not a bool" - if summarize: - assert ( - hpo is False and repeat is False - ), f"Error! both {repeat=} and {hpo=} must be False when summarizing results from multiple experiments." - storage_uri = config_init.storage_uri - assert isinstance(storage_uri, str), f"Error! {storage_uri=} is not a str" - - list_of_experiment_names = config_init.list_of_experiment_names - assert isinstance(list_of_experiment_names, list), f"Error! {list_of_experiment_names=} is not a list" - for exp in list_of_experiment_names: - assert isinstance(exp, str), f"Error! {exp=} is not a str" - - task_names = config_init.task_names - assert isinstance(task_names, list), f"Error! {task_names=} is not a list" - for t in task_names: - assert isinstance(t, str), f"Error! {t=} is not a str" - - run_repetitions = config_init.run_repetitions - assert isinstance(run_repetitions, int) and run_repetitions > 0, f"Error! {run_repetitions=} is invalid" - #get results and parameters from mlflow logs - results_and_parameters = get_results_and_parameters( - storage_uri = storage_uri, - logger = logger, - experiments = list_of_experiment_names, - task_names = task_names, - num_repetitions = run_repetitions - ) - return - - #optimize hyperparameters and/or do repeated runs for single experiments - repeat = args.repeat - assert isinstance(repeat, bool), f"Error! {repeat=} is not a bool" - hpo = args.hpo - assert isinstance(hpo, bool), f"Error! {hpo=} is not a bool" - assert ( - hpo is True or repeat is True - ), f"Error! either {repeat=} or {hpo=} must be True" - parent_run_id = args.parent_run_id - if parent_run_id is not None: - assert isinstance(parent_run_id, str), f"Error! {parent_run_id=} is not a str" - - - # validate the objects - experiment_name = config_init.experiment_name - assert isinstance(experiment_name, str), f"Error! {experiment_name=} is not a str" - run_name = config_init.run_name - if run_name is not None: - assert isinstance(run_name, str), f"Error! {run_name=} is not a str" - # validate defaults - defaults = config_init.defaults - assert isinstance(defaults, Defaults), f"Error! {defaults=} is not a Defaults" - - tasks = config_init.tasks - assert isinstance(tasks, list), f"Error! {tasks=} is not a list" - for t in tasks: - assert isinstance(t, Task), f"Error! {t=} is not a Task" - # if there is not specific terratorch_task specified, then use default terratorch_task - if t.terratorch_task is None: - t.terratorch_task = defaults.terratorch_task - # defaults.trainer_args["max_epochs"] = 5 - storage_uri = config_init.storage_uri - assert isinstance(storage_uri, str), f"Error! {storage_uri=} is not a str" - - optimization_space = config_init.optimization_space - assert isinstance( - optimization_space, dict - ), f"Error! {optimization_space=} is not a dict" - - # ray_storage_path is optional - ray_storage_path = config_init.ray_storage_path - if ray_storage_path is not None: - assert isinstance( - ray_storage_path, str - ), f"Error! {ray_storage_path=} is not a str" - - n_trials = config_init.n_trials - assert isinstance(n_trials, int) and n_trials > 0, f"Error! {n_trials=} is invalid" - run_repetitions = config_init.run_repetitions - - report_on_best_val = config_init.report_on_best_val - assert isinstance( - report_on_best_val, bool - ), f"Error! {ray_storage_path=} is not a bool" - - save_models = config_init.save_models - assert isinstance( - save_models, bool - ), f"Error! {save_models=} is not a bool" - - test_models = config_init.test_models - assert isinstance( - test_models, bool - ), f"Error! {test_models=} is not a bool" - - bayesian_search = config_init.bayesian_search - assert isinstance( - bayesian_search, bool - ), f"Error! {bayesian_search=} is not a bool" - - - logger_path = config_init.logger - if logger_path is None: - storage_uri_path = Path(storage_uri) - logger = get_logger(log_folder=f"{str(storage_uri_path.parents[0])}/job_logs") - else: - logging.config.fileConfig(fname=logger_path, disable_existing_loggers=False) - logger = logging.getLogger("terratorch-iterate") - - #custom_modules_path is optional - custom_modules_path = config_init.custom_modules_path - if custom_modules_path is not None: - assert isinstance( - custom_modules_path, str - ), f"Error! {custom_modules_path=} is not a str" - import_custom_modules(logger=logger, custom_modules_path=custom_modules_path) - - if repeat and not hpo: - output = config_init.output_path - if output is None: - storage_uri_path = Path(storage_uri) - assert ( - storage_uri_path.exists() and storage_uri_path.is_dir() - ), f"Error! Unable to create new output_path based on storage_uri_path because the latter does not exist: {storage_uri_path}" - output_path = storage_uri_path.parents[0] / "repeated_exp_output_csv" - output_path.mkdir(parents=True, exist_ok=True) - output_path = output_path / f"{experiment_name}_repeated_exp_mlflow.csv" - output = str(output_path) - - logger.info("Rerun best experiments...") - rerun_best_from_backbone( - logger=logger, - parent_run_id=parent_run_id, - output_path=str(output_path), - defaults=defaults, - tasks=tasks, - experiment_name=experiment_name, - storage_uri=storage_uri, - optimization_space=optimization_space, - run_repetitions=run_repetitions, - save_models=save_models, - report_on_best_val=report_on_best_val, - ) - else: - if not repeat and hpo: - run_repetitions = 0 - - # run_repetitions is an optional parameter - benchmark_backbone( - defaults=defaults, - tasks=tasks, - experiment_name=experiment_name, - storage_uri=storage_uri, - ray_storage_path=ray_storage_path, - run_name=run_name, - optimization_space=optimization_space, - n_trials=n_trials, - run_repetitions=run_repetitions, - save_models=save_models, - report_on_best_val=report_on_best_val, - test_models=test_models, - bayesian_search=bayesian_search, - logger=logger, - ) - - -if __name__ == "__main__": - main() diff --git a/terratorch_iterate/model_fitting.py b/terratorch_iterate/model_fitting.py deleted file mode 100644 index 82ea59d..0000000 --- a/terratorch_iterate/model_fitting.py +++ /dev/null @@ -1,671 +0,0 @@ -""" -This module contains all the logic for fitting models -""" - -import abc -import copy -import dataclasses -import importlib -import os -import shutil -import types -import uuid -import warnings -from abc import abstractmethod -from functools import wraps -from typing import Callable -import pandas as pd -import lightning.pytorch as pl -import mlflow -import optuna -from lightning import Callback, Trainer -from lightning.pytorch.callbacks import ( - EarlyStopping, - LearningRateMonitor, - ModelCheckpoint, - Timer, -) -from lightning.pytorch.loggers.mlflow import MLFlowLogger - -# from ray.air.integrations.mlflow import -from optuna.integration import PyTorchLightningPruningCallback -from ray import tune -from ray.air import CheckpointConfig, RunConfig -from ray.train._internal.storage import StorageContext -from ray.tune.experiment import Trial -import pdb -# for ddp in the future if required -# import ray -# from ray.train import report -# from ray import train -# from ray.air import CheckpointConfig, ScalingConfig -# from ray.train.lightning import ( -# RayDeepSpeedStrategy, -# RayLightningEnvironment, -# RayTrainReportCallback, -# prepare_trainer, -# ) -# from ray.train.torch import TorchTrainer -from ray.tune.integration.pytorch_lightning import TuneReportCheckpointCallback -from ray.tune.schedulers import FIFOScheduler, TrialScheduler -from ray.tune.schedulers.hb_bohb import HyperBandForBOHB -from ray.tune.search import SearchAlgorithm, Searcher -from ray.tune.search.bohb import TuneBOHB -from terratorch.tasks import PixelwiseRegressionTask, SemanticSegmentationTask -from torchgeo.datamodules import BaseDataModule -from torchgeo.trainers import BaseTask - -from benchmark.benchmark_types import ( - ParameterBounds, - ParameterTypeEnum, - TrainingSpec, - optimization_space_type, - recursive_merge, - valid_task_types, -) - -os.environ["TUNE_DISABLE_AUTO_CALLBACK_LOGGERS"] = ( - "1" # disable tune loggers, will add csv and json manually. If this is not here, it will log to tensorboard automatically -) - -SEED = 42 - - -class ParameterPicker(abc.ABC): - @abstractmethod - def pick_categorical(self, variable, choices): - pass - - @abstractmethod - def pick_int(self, variable, low, high): - pass - - @abstractmethod - def pick_float(self, variable, low, high, log=False): - pass - - -class OptunaParameterPicker(ParameterPicker): - def __init__(self, trial: optuna.Trial): - super().__init__() - self.trial = trial - - def pick_categorical(self, variable, choices): - return self.trial.suggest_categorical(variable, choices) - - def pick_int(self, variable, low, high): - return self.trial.suggest_int(variable, low, high) - - def pick_float(self, variable, low, high, log=False): - return self.trial.suggest_float(variable, low, high, log=log) - - -class RayTuneParameterPicker(ParameterPicker): - def __init__(self): - super().__init__() - - def pick_categorical(self, variable, choices): - return tune.choice(choices) - - def pick_int(self, variable, low, high): - return tune.quniform(low, high, 1) - - def pick_float(self, variable, low, high, log=False): - if log: - return tune.loguniform(low, high) - return tune.uniform(low, high) - - -class _TuneReportCallback(TuneReportCheckpointCallback, pl.Callback): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - -def inject_hparams(training_spec: TrainingSpec, config: dict): - # treat batch size specially - config_without_batch_size = copy.deepcopy(config) - assert isinstance( - config_without_batch_size, dict - ), f"Error! Unexpected config type: {config_without_batch_size}" - batch_size: int | None = config_without_batch_size.pop("batch_size", None) # type: ignore - datamodule_with_generated_hparams = copy.deepcopy(training_spec.task.datamodule) - if batch_size: - datamodule_with_generated_hparams.batch_size = batch_size - - terratorch_task_with_generated_hparams = copy.deepcopy( - training_spec.task.terratorch_task - ) - if terratorch_task_with_generated_hparams is None: - terratorch_task_with_generated_hparams = {} - - recursive_merge(terratorch_task_with_generated_hparams, config_without_batch_size) - task_with_generated_hparams = dataclasses.replace( - training_spec.task, - terratorch_task=terratorch_task_with_generated_hparams, - datamodule=datamodule_with_generated_hparams, - ) - training_spec_with_generated_hparams = dataclasses.replace( - training_spec, task=task_with_generated_hparams - ) - return training_spec_with_generated_hparams - - -def get_default_callbacks( - early_stop_patience: int | None, max_run_duration: str | None -) -> list[Callback]: - default_callbacks: list[Callback] = [ - LearningRateMonitor(logging_interval="epoch"), - ] - if early_stop_patience is not None: - default_callbacks.append( - EarlyStopping("val/loss", patience=early_stop_patience) - ) - if max_run_duration is not None: - default_callbacks.append(Timer(duration=max_run_duration)) - return default_callbacks - - -def generate_parameters( - parameter_picker: ParameterPicker, - current_hparams: dict, - hparam_space: dict, - ignore_keys: set[str] | None = None, - dictionary_position: list[str] | None = None, -): - if ignore_keys is None: - ignore_keys = set() - if dictionary_position is None: - dictionary_position = [] - _generate_parameters( - parameter_picker, - current_hparams, - hparam_space, - ignore_keys, - dictionary_position, - ) - - -def _generate_parameters( - parameter_picker: ParameterPicker, - current_hparams: dict, - hparam_space: dict, - ignore_keys: set[str], - dictionary_position: list[str], -): - for parameter, space in hparam_space.items(): - if parameter in ignore_keys: - continue - # if its a dictionary, continue to recurse - if isinstance(space, dict): - if parameter not in current_hparams: - current_hparams[parameter] = {} - dictionary_position.append(parameter) - _generate_parameters( - parameter_picker, - current_hparams[parameter], - hparam_space[parameter], - ignore_keys, - dictionary_position, - ) - dictionary_position.pop() - # if not, get a value from the parameter_picker and insert it with the name prepended by the dictionary position - # this is important so that the full path of the parameter is used - # this will avoid confusion between parameters with the same name but from different components - else: - full_parameter_name = ".".join(dictionary_position + [parameter]) - if isinstance(space, list): - suggestion = parameter_picker.pick_categorical( - full_parameter_name, space - ) - current_hparams[parameter] = suggestion - elif isinstance(space, ParameterBounds): - match space.type: - case ParameterTypeEnum.integer: - current_hparams[parameter] = parameter_picker.pick_int( - full_parameter_name, - int(space.min), - int(space.max), - ) - case ParameterTypeEnum.real: - current_hparams[parameter] = parameter_picker.pick_float( - full_parameter_name, space.min, space.max, log=space.log - ) - case _: - raise Exception( - f"Type {space.type} not recognized. Suggest one of {[e.value for e in ParameterTypeEnum]}" - ) - else: - raise Exception( - "Leaves of optimization space must be lists or ParameterBounds" - ) - - -""" -single node - optuna -""" - - -def launch_training( - trainer: Trainer, - task: BaseTask, - datamodule: BaseDataModule, - run_name: str, - experiment_name: str, - metric: str, - storage_uri: str, - parent_run_id: str, - direction: str, - test_models: bool, - delete_models_after_testing: bool, -) -> float: - - with mlflow.start_run(run_name=run_name, nested=True) as run: - mlflow.set_tag("mlflow.parentRunId", parent_run_id) - # explicitly log batch_size. Since it is not a model param, it will not be logged - mlflow.log_param("batch_size", datamodule.batch_size) - - trainer.logger = MLFlowLogger( - experiment_name=experiment_name, - run_id=run.info.run_id, - save_dir=storage_uri, - log_model=not delete_models_after_testing, - ) - trainer.fit(task, datamodule=datamodule) - if test_models: - trainer.test(ckpt_path="best", datamodule=datamodule) - if delete_models_after_testing: - # delete the checkpoints folder in the run - ckpts_folder = os.path.join( - trainer.logger.save_dir, - str(trainer.logger.name), - trainer.logger.version, - "checkpoints", - ) - shutil.rmtree(ckpts_folder) - - client = mlflow.tracking.MlflowClient( - tracking_uri=storage_uri, - ) - - if not metric.startswith("val"): - raise Exception( - f"Metric {metric} does not start with `val`. Please choose a validation metric" - ) - for_pd_collect = [] - val_metrics_names = [] - for metric_name in client.get_run(run.info.run_id).data.metrics: - if metric_name.startswith("val/"): - val_metrics_names.append(metric_name) - val_metric_history = client.get_metric_history( - run.info.run_id, metric_name - ) - pd_convertible_metric_history = [ - { - "metric_name": mm.key, - "step": mm.step, - "value": mm.value, - } - for mm in val_metric_history - ] - for_pd_collect += pd_convertible_metric_history - df_val_metrics = pd.DataFrame.from_records(for_pd_collect) - df_val_metrics = df_val_metrics.set_index( - ["metric_name", "step"], verify_integrity=True - ) - series_val_metrics = df_val_metrics["value"] - if direction == "max": - best_step = series_val_metrics[metric].idxmax() - elif direction == "min": - best_step = series_val_metrics[metric].idxmin() - else: - raise Exception(f"Direction must be `max` or `min` but got {direction}") - - for val_metric_name in val_metrics_names: - mlflow.log_metric( - f"best_step_{val_metric_name}", - series_val_metrics[(val_metric_name, best_step)], - ) - - return series_val_metrics[(metric, best_step)] - - -def fit_model( - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - run_name: str, - experiment_name: str, - storage_uri: str, - parent_run_id: str, - trial: optuna.Trial | None = None, - save_models: bool = False, - test_models: bool = False, -) -> tuple[float, str]: - pl.seed_everything(SEED, workers=True) - training_spec_copy = copy.deepcopy(training_spec) - task = training_spec_copy.task - - if lightning_task_class in [ - SemanticSegmentationTask, - PixelwiseRegressionTask, - ]: - task.terratorch_task["plot_on_val"] = False - assert isinstance( - task.terratorch_task, dict - ), f"Error! Invalid type: {task.terratorch_task}" - - lightning_task = lightning_task_class(**task.terratorch_task) - - if len(training_spec.trainer_args.get("callbacks", [])) > 0: - warnings.warn( - "Callbacks passed to trainer. Make sure these are stateless, as they will not be reinitialized for each task!" - ) - default_callbacks: list[Callback] = get_default_callbacks( - task.early_stop_patience, task.max_run_duration - ) - - if task.early_prune and trial is not None: - default_callbacks.append( - PyTorchLightningPruningCallback(trial, monitor="val/loss") - ) - - delete_models_after_testing = False - if test_models and not save_models: - # we need to save the models during training to be able to test but can be deleted afterwards - save_models = True - delete_models_after_testing = True - - if save_models: - default_callbacks.append( - ModelCheckpoint(monitor=task.metric, mode=task.direction) - ) - if "enable_checkpointing" in training_spec_copy.trainer_args: - warnings.warn( - f"enable_checkpointing found. Will be overwritten to the value of save_models {save_models}" - ) - training_spec_copy.trainer_args["enable_checkpointing"] = save_models - training_spec_copy.trainer_args["enable_progress_bar"] = ( - training_spec_copy.trainer_args.get("enable_progress_bar", True) - ) - # get callbacks (set to empty list if none defined) and extend with default ones - training_spec_copy.trainer_args.setdefault("callbacks", []).extend( - default_callbacks - ) # type: ignore - - trainer = Trainer(**training_spec_copy.trainer_args) - - return ( - launch_training( - trainer, - lightning_task, - task.datamodule, - run_name, - experiment_name, - task.metric, - storage_uri, - parent_run_id, - task.direction, - test_models=test_models, - delete_models_after_testing=delete_models_after_testing, - ), - task.metric, - ) - - -def fit_model_with_hparams( - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - run_name: str, - experiment_name: str, - hparam_space: optimization_space_type, - storage_uri: str, - parent_run_id: str, - save_models: bool, - test_models: bool, - trial: optuna.Trial, -) -> float: - """ - Generate parameters using the optuna trial from the given parameters. - Then inject these into the given task. - It is important to make sure to not overwrite the task passed in the arguments, or these updates may affect - subsequent trials. - """ - current_hparams: dict[str, int | float | str | bool] = {} - task = training_spec.task - generate_parameters( - OptunaParameterPicker(trial), - current_hparams, - hparam_space, - ignore_keys=task.optimization_except, - ) - - training_spec_with_generated_hparams = inject_hparams( - training_spec, current_hparams - ) - run_name = f"{run_name}_{trial.number}" - return fit_model( - training_spec_with_generated_hparams, - lightning_task_class, - run_name, - experiment_name, - storage_uri, - parent_run_id, - trial, - save_models=save_models, - test_models=test_models, - )[ - 0 - ] # return only the metric value for optuna - - -""" -multi node - ray -""" - - -def ray_tune_model( - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - hparam_space: optimization_space_type, - storage_uri: str, - ray_storage_path: str, - experiment_name: str, - save_models: bool, - num_trials: int, - backbone_import: str | None = None, - searcher: Searcher | SearchAlgorithm | None = None, -) -> tune.ResultGrid: - - if not searcher: - raise ValueError("searcher must be specified") - trainable = tune.with_parameters( - ray_fit_model, - training_spec=training_spec, - lightning_task_class=lightning_task_class, - storage_uri=storage_uri, - experiment_name=experiment_name, - parent_run_id=mlflow.active_run().info.run_id, - save_models=save_models, - backbone_import=backbone_import, - ) - - current_hparams: dict[str, int | float | str | bool] = {} - task = training_spec.task - generate_parameters( - RayTuneParameterPicker(), - current_hparams, - hparam_space, - ignore_keys=task.optimization_except, - ) - - # Early stopping - # It is unclear if this is working properly when checkpoints are disabled - if task.early_prune: - search_alg: Searcher | SearchAlgorithm = TuneBOHB() - scheduler: TrialScheduler = HyperBandForBOHB( - time_attr="training_iteration", - max_t=training_spec.trainer_args["max_epochs"], - reduction_factor=2, - stop_last_trials=False, - ) - if not save_models: - raise RuntimeWarning( - "It is unclear if using `early_prune=True` with `save_models=False` produces correct results." - ) - else: - scheduler = FIFOScheduler() - search_alg = searcher - - # monkey patch scheduler to add trial storage dir - def decorate_to_add_trial_info(fn: Callable): - old_fn = fn - - @wraps(fn) - def new_func(self, tune_controller, trial: Trial): - trial.config["trial_storage"] = trial.storage - return old_fn(tune_controller, trial) - - return new_func - - scheduler.on_trial_add = types.MethodType( - decorate_to_add_trial_info(scheduler.on_trial_add), scheduler - ) - - # for ddp if required in the future - # scaling_config = ScalingConfig( - # use_gpu=True, - # num_workers=1, - # resources_per_worker={"CPU": 4, "GPU": 1}, - # trainer_resources={"CPU": 1, "GPU": 0}, - # ) - # ray_trainer = TorchTrainer( - # trainable, - # scaling_config=scaling_config, - # ) - - trainable_with_resources = tune.with_resources( - trainable, resources={"cpu": 8, "gpu": 1} - ) - - storage_path = os.path.join(ray_storage_path, experiment_name) - tuner = tune.Tuner( - trainable_with_resources, - tune_config=tune.TuneConfig( - metric=task.metric, - mode=task.direction, - num_samples=num_trials, - search_alg=search_alg, - scheduler=scheduler, - reuse_actors=False, - ), - run_config=RunConfig( - name=mlflow.active_run().info.run_name, - storage_path=storage_path, - callbacks=[ - tune.logger.CSVLoggerCallback(), - tune.logger.JsonLoggerCallback(), - # RayLogArtifactsMlFlowCallback(), - ], - checkpoint_config=( - CheckpointConfig( - num_to_keep=1, - checkpoint_score_attribute=task.metric, - checkpoint_score_order=task.direction, - ) - if save_models - else None - ), - # stop={"training_iteration": training_spec.trainer_args["max_epochs"]}, - ), - param_space=current_hparams, - ) - results = tuner.fit() - return results - - -def _generate_random_name(task_name: str): - # needed since the random names from mlflow are affected by the seed - # so they are always the same - return f"{task_name}_{uuid.uuid4().hex[:8]}" - - -def ray_fit_model( - config: dict, - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - storage_uri: str, - experiment_name: str, - parent_run_id: str, - save_models: bool = True, - backbone_import: str | None = None, -) -> None: - if backbone_import: - importlib.import_module(backbone_import) - print(config) - pl.seed_everything(SEED, workers=True) - tune.utils.wait_for_gpu( - target_util=0.07, delay_s=10, retry=50 - ) # sometimes process needs some time to release GPU - - trial_storage: StorageContext = config.pop("trial_storage", None) - - training_spec_with_generated_hparams = inject_hparams(training_spec, config) - task = training_spec_with_generated_hparams.task - - if lightning_task_class in [ - SemanticSegmentationTask, - PixelwiseRegressionTask, - ]: - task.terratorch_task["plot_on_val"] = False - lightning_task = lightning_task_class(**task.terratorch_task) - - if len(training_spec.trainer_args.get("callbacks", [])) > 0: - warnings.warn( - "Callbacks passed to trainer. Make sure these are stateless, as they will not be reinitialized for each task!" - ) - - default_callbacks: list[Callback] = get_default_callbacks( - task.early_stop_patience, task.max_run_duration - ) - default_callbacks.append( - _TuneReportCallback(metrics=[task.metric], save_checkpoints=save_models) - ) - - if "enable_checkpointing" in training_spec_with_generated_hparams.trainer_args: - warnings.warn( - "enable_checkpointing found. Will be overwritten to False as ray will be responsible for saving models." - ) - training_spec_with_generated_hparams.trainer_args["enable_checkpointing"] = False - if "enable_progress_bar" in training_spec_with_generated_hparams.trainer_args: - warnings.warn("enable_progress_bar found. Will be overwritten to False") - training_spec_with_generated_hparams.trainer_args["enable_progress_bar"] = False - - # get callbacks (set to empty list if none defined) and extend with default ones - training_spec_with_generated_hparams.trainer_args.setdefault( - "callbacks", [] - ).extend(default_callbacks) - - trainer = Trainer(**training_spec_with_generated_hparams.trainer_args) - - # trainer = prepare_trainer(trainer) - - mlflow.set_tracking_uri(storage_uri) - mlflow.set_experiment(experiment_name) - - with mlflow.start_run( - run_name=_generate_random_name(training_spec.task.name), - parent_run_id=parent_run_id, - ) as run: - trainer.logger = MLFlowLogger( - experiment_name=experiment_name, - run_id=run.info.run_id, - run_name=run.info.run_name, - save_dir=storage_uri, - log_model=save_models, - ) - - # explicitly log batch_size. Since it is not a model param, it will not be logged - mlflow.log_param("batch_size", task.datamodule.batch_size) - trainer.fit(lightning_task, datamodule=task.datamodule) - print("Trial Storage: ", trial_storage.trial_fs_path) - if trial_storage is not None: - mlflow.log_artifacts(trial_storage.trial_fs_path) diff --git a/terratorch_iterate/module.py b/terratorch_iterate/module.py deleted file mode 100644 index 53f8eb3..0000000 --- a/terratorch_iterate/module.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Module functions.""" - -__copyright__ = """ -LICENSED INTERNAL CODE. PROPERTY OF IBM. -IBM Research Licensed Internal Code -(C) Copyright IBM Corp. 2024 -ALL RIGHTS RESERVED -""" - -from dataclasses import dataclass -from typing import Callable - - -def hello_world() -> str: - """Return Hello World.""" - return "Hello World" - - -class Foo: - """An example class.""" - - def __init__(self, a: int) -> None: - """Initialize Foo. - - Args: - a : documentation for argument a. - """ - self.a = a - - def method_that_would_really_waste_your_time_if_it_fails(self) -> str: - """Static typing could help you fix a bug in here before running any test. - - Returns: - documentation for the returned string - """ - self.a_times_1 = [1] * self.a - # example that would trigger a mypy typechecking failure - # return self.a + "When will you find out that this fails?" - return f"{self.a} This works" - - -@dataclass -class Bar: - """An example dataclass.""" - - #: some documentation for attribute b - b: str - - def set_b(self, compute_b: Callable[[], str]) -> None: - """Set b from return of a given function. - - Args: - compute_b (Callable[[], str]): function without arguments to determine b. - """ - self.b = compute_b() - - -if __name__ == "__main__": - # foo = Foo(1.0) # example that would fail (but mypy can tell you in advance) - foo = Foo(1) - - bar = Bar(b="excellent to each other") - print(bar.b) - - bar.set_b(hello_world) - print(bar.b) diff --git a/terratorch_iterate/plot_tools.py b/terratorch_iterate/plot_tools.py deleted file mode 100644 index 5ce6c82..0000000 --- a/terratorch_iterate/plot_tools.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright contributors to the geobench project -# modified from geobench (https://github.com/ServiceNow/geo-bench/blob/main/geobench/plot_tools.py) - - -import numpy as np -from matplotlib import pyplot as plt -import pandas as pd -import seaborn as sns -from matplotlib.ticker import FormatStrFormatter -import json -from scipy.stats import trim_mean - - -sns.set_style("dark", {"grid.color": "0.98", "axes.facecolor": "(0.95, 0.95, 0.97)"}) -GEO_BENCH_DIR = "geobench" - - -def biqm(scores): - """Return a bootstram sample of iqm.""" - b_scores = np.random.choice(scores, size=len(scores), replace=True) - return trim_mean(b_scores, proportiontocut=0.25, axis=None) - - -def iqm(scores): - """Interquantile mean.""" - return trim_mean(scores, proportiontocut=0.25, axis=None) - - -def bootstrap_iqm( - df, - group_keys=("model", "dataset", "partition name"), - metric="test_metric", - repeat=100, -): - """Boostram of seeds for all model and all datasets to comput iqm score distribution.""" - df_list = [] - for i in range(repeat): - series = df.groupby(list(group_keys))[metric].apply(biqm) - df_list.append(series.to_frame().reset_index()) - - return pd.concat(df_list) - - -def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100): - """Stratified bootstrap (by dataset) of all seeds to compute iqm score distribution for each model.""" - group = df.groupby(["model", "dataset", "partition name"]) - - df_list = [] - for i in range(repeat): - new_df = group.sample(frac=1, replace=True) - series = new_df.groupby(["model", "partition name"])[metric].apply(iqm) - df_list.append(series.to_frame().reset_index()) - - new_df = pd.concat(df_list) - new_df.loc[:, "dataset"] = "aggregated" - return new_df - - -def average_seeds( - df, group_keys=("model", "dataset", "partition name"), metric="test metric" -): - """Average seeds for all model and all datasets.""" - df_avg = df.groupby(list(group_keys))[metric].mean() - df_avg = df_avg.unstack(level="dataset") - - df_avg = df_avg.round(3) - return df_avg - - -def extract_1x_data(df_all): - """Extract only resutls trained on 100% of the data""" - return df_all[ - (df_all["partition name"] == "1.00x train") - | (df_all["partition name"] == "default") - ].copy() - - -def normalize_bootstrap_and_plot( - df, - metric, - benchmark_name, - model_order, - model_colors=None, - repeat=100, - fig_size=None, - n_legend_rows=2, -): - """Add aggregated data as a new dataset.""" - - # normalize all the scores based on the benchmark name. - # the normalizing data is expected to be found in the benchmark directory under normalizer.json - if benchmark_name: - normalizer = load_normalizer(benchmark_name=benchmark_name) - new_metric = normalizer.normalize_data_frame(df, metric) - else: - new_metric = metric - - # create a new df containing bootstrapped samples of iqm - bootstrapped_iqm = pd.concat( - ( - bootstrap_iqm_aggregate( - df, metric=new_metric, repeat=repeat - ), # stratified bootstrap across all datasets - bootstrap_iqm( - df, metric=new_metric, repeat=repeat - ), # bootstrapped iqm for each dataset - ) - ) - - # plot results per dataset (aggregated results is an extra dataset) - plot_per_dataset( - bootstrapped_iqm, - model_order, - model_colors=model_colors, - metric=new_metric, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - - -class Normalizer: - """Class used to normalize results beween min and max for each dataset.""" - - def __init__(self, range_dict): - """Initialize a new instance of Normalizer class.""" - self.range_dict = range_dict - - def __call__(self, ds_name, values, scale_only=False): - """Call the Normalizer class.""" - mn, mx = self.range_dict[ds_name] - range = mx - mn - if scale_only: - return values / range - else: - return (values - mn) / range - - def from_row(self, row, scale_only=False): - """Normalize from row.""" - return [ - self(ds_name, val, scale_only=scale_only) for ds_name, val in row.items() - ] - - def normalize_data_frame(self, df, metric): - """Normalize the entire dataframe.""" - new_metric = f"normalized {metric}" - df[new_metric] = df.apply( - lambda row: self.__call__(row["dataset"], row[metric]), axis=1 - ) - return new_metric - - def save(self, benchmark_name): - """Save normalizer to json file.""" - with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "w") as f: - json.dump(self.range_dict, f, indent=2) - - -def load_normalizer(benchmark_name): - """Load normalizer from json file.""" - with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "r") as f: - range_dict = json.load(f) - return Normalizer(range_dict) - - -def make_normalizer(data_frame, metrics=("test metric",), benchmark_name=None): - """Extract min and max from data_frame to build Normalizer object for all datasets.""" - datasets = data_frame["dataset"].unique() - range_dict = {} - - for dataset in datasets: - sub_df = data_frame[data_frame["dataset"] == dataset] - data = [] - for metric in metrics: - data.append(sub_df[metric].to_numpy()) - range_dict[dataset] = (np.min(data), np.max(data)) - - normalizer = Normalizer(range_dict) - - if benchmark_name: - normalizer.save(benchmark_name) - - return normalizer - - -def remove_violin_outline(ax): - """Remove the outline of the violin plot.""" - for pc in ax.collections: - pc.set_edgecolor("none") - - -def plot_per_dataset( - df, - model_order, - metric="test metric", - aggregated_name="aggregated", - sharey=True, - inner="box", - fig_size=None, - n_legend_rows=1, - model_colors=None, -): - """Violin plots for each datasets and each models. - - If a dataset is named `aggregated_name` it will be the first and will be highlighted in light blue. - - """ - datasets = sorted(df["dataset"].unique()) - - if fig_size is None: - fig_width = len(datasets) * 2 - fig_size = (fig_width, 3) - fig, axes = plt.subplots(1, len(datasets), sharey=sharey, figsize=fig_size) - - if model_colors is None: - colors = sns.color_palette("colorblind", n_colors=len(model_order)) - model_colors = dict(zip(model_order, colors)) - - for dataset, ax in zip(datasets, axes): - sub_df = df[df["dataset"] == dataset] - sns.violinplot( - x="dataset", - y=metric, - hue="model", - data=sub_df, - hue_order=model_order, - linewidth=0.5, - saturation=1, - scale="count", - inner=inner, - palette=model_colors, - ax=ax, - ) - remove_violin_outline(ax) - ax.tick_params(axis="y", labelsize=8) - ax.yaxis.set_major_formatter(FormatStrFormatter("%.2f")) - ax.grid(axis="y") - - if dataset == aggregated_name: - ax.set_facecolor("#cff6fc") - - ax.set(xlabel=None) - - if dataset != datasets[int((len(datasets) - 1) / 2)]: - ax.get_legend().remove() - else: - ncols = int(np.ceil(len(model_order) / n_legend_rows)) - sns.move_legend( - ax, loc="lower center", bbox_to_anchor=(0.5, 1), ncol=ncols, title="" - ) - - if dataset != datasets[0]: - ax.set(ylabel=None) - - if sharey: - fig.subplots_adjust(wspace=0.02) - else: - fig.subplots_adjust(wspace=0.3) diff --git a/terratorch_iterate/py.typed b/terratorch_iterate/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/terratorch_iterate/repeat_best_experiment.py b/terratorch_iterate/repeat_best_experiment.py deleted file mode 100644 index 4103e99..0000000 --- a/terratorch_iterate/repeat_best_experiment.py +++ /dev/null @@ -1,468 +0,0 @@ -""" -This module contains functions to re-run a best backbone with different seeds -""" - -import copy -import importlib -import os -import glob -import warnings -import logging -from ast import literal_eval -from random import randint - -import mlflow -import mlflow.entities -import pandas as pd -import ray -from jsonargparse import CLI -from lightning import Callback, Trainer -from lightning.pytorch import seed_everything -from lightning.pytorch.callbacks import ModelCheckpoint -import shutil -from tabulate import tabulate -from terratorch.tasks import PixelwiseRegressionTask, SemanticSegmentationTask - -from lightning.pytorch.loggers.mlflow import MLFlowLogger -import time - -from benchmark.benchmark_types import ( - Defaults, - Task, - TrainingSpec, - combine_with_defaults, -) -from benchmark.model_fitting import ( - get_default_callbacks, - inject_hparams, - valid_task_types, -) -import pdb - -@ray.remote(num_cpus=8, num_gpus=1) -def remote_fit( - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - best_params: dict, - seed: int, - backbone_import: str | None = None, -) -> float | None: - seed_everything(seed, workers=True) - if backbone_import: - importlib.import_module(backbone_import) - - with mlflow.start_run( - run_name=f"{lightning_task_class.name}_{seed}", - nested=True, - ): - - training_spec_copy = copy.deepcopy(training_spec) - training_spec_with_generated_hparams = inject_hparams( - training_spec_copy, best_params - ) - task = training_spec_with_generated_hparams.task - - if lightning_task_class in [ - SemanticSegmentationTask, - PixelwiseRegressionTask, - ]: - task.terratorch_task["plot_on_val"] = False - lightning_task = lightning_task_class(**task.terratorch_task) - - if len(training_spec.trainer_args.get("callbacks", [])) > 0: - warnings.warn( - "Callbacks passed to trainer. Make sure these are stateless, as they will not be reinitialized for each task!" - ) - - default_callbacks: list[Callback] = get_default_callbacks( - task.early_stop_patience, task.max_run_duration - ) - # get callbacks (set to empty list if none defined) and extend with default ones - training_spec_with_generated_hparams.trainer_args.setdefault( - "callbacks", [] - ).extend( - default_callbacks - ) # type: ignore - if "enable_checkpointing" in training_spec_with_generated_hparams.trainer_args: - warnings.warn( - "enable_checkpointing found. Will be overwritten to False as ray will be responsible for saving models." - ) - training_spec_with_generated_hparams.trainer_args["enable_checkpointing"] = ( - False - ) - if "enable_progress_bar" in training_spec_with_generated_hparams.trainer_args: - warnings.warn("enable_progress_bar found. Will be overwritten to False") - training_spec_with_generated_hparams.trainer_args["enable_progress_bar"] = False - trainer = Trainer(**training_spec_with_generated_hparams.trainer_args) - try: - trainer.fit(lightning_task, datamodule=task.datamodule) - metrics = trainer.test( - lightning_task, datamodule=task.datamodule, verbose=False - ) - metrics = metrics[0] - except Exception as e: - raise Exception(str(e)) - # warnings.warn(str(e)) - # return None - test_metric = "test/" + task.metric.split("/")[1] - mlflow.log_metric(f"test_{test_metric}", metrics[test_metric]) - return metrics[test_metric] - - -def non_remote_fit( - experiment_name: str, - parent_run_id: str, - storage_uri: str, - task: Task, - training_spec: TrainingSpec, - lightning_task_class: valid_task_types, - best_params: dict, - seed: int, - backbone_import: str | None = None, - save_models: bool = False, - report_on_best_val: bool = True, -) -> float | None: - seed_everything(seed, workers=True) - if backbone_import: - importlib.import_module(backbone_import) - with mlflow.start_run( - run_name=f"{task.name}_{seed}", - nested=True, - ) as run: - mlflow.set_tag("mlflow.parentRunId", parent_run_id) - training_spec_copy = copy.deepcopy(training_spec) - training_spec_with_generated_hparams = inject_hparams( - training_spec_copy, best_params - ) - task = training_spec_with_generated_hparams.task - - if lightning_task_class in [ - SemanticSegmentationTask, - PixelwiseRegressionTask, - ]: - task.terratorch_task["plot_on_val"] = False - lightning_task = lightning_task_class(**task.terratorch_task) - - if len(training_spec.trainer_args.get("callbacks", [])) > 0: - warnings.warn( - "Callbacks passed to trainer. Make sure these are stateless, as they will not be reinitialized for each task!" - ) - - default_callbacks: list[Callback] = get_default_callbacks( - task.early_stop_patience, task.max_run_duration - ) - delete_models_after_testing = False - - if report_on_best_val and not save_models: - # we need to save the models to be able to report results on best validation model - save_models = True - delete_models_after_testing = True - - if save_models: - default_callbacks.append( - ModelCheckpoint(monitor=task.metric, mode=task.direction) - ) - - if "enable_checkpointing" in training_spec_with_generated_hparams.trainer_args: - warnings.warn( - f"enable_checkpointing found. Will be overwritten to the value of save_models {save_models}" - ) - training_spec_with_generated_hparams.trainer_args["enable_checkpointing"] = ( - save_models - ) - if "enable_progress_bar" in training_spec_with_generated_hparams.trainer_args: - warnings.warn("enable_progress_bar found. Will be overwritten to False") - training_spec_with_generated_hparams.trainer_args["enable_progress_bar"] = False - # get callbacks (set to empty list if none defined) and extend with default ones - training_spec_with_generated_hparams.trainer_args.setdefault( - "callbacks", [] - ).extend( - default_callbacks - ) # type: ignore - - trainer = Trainer(**training_spec_with_generated_hparams.trainer_args) - trainer.logger = MLFlowLogger( - experiment_name=experiment_name, - run_id=run.info.run_id, - save_dir=storage_uri, - log_model=False, # don't copy saved checkpoints to artifacts - ) - try: - trainer.fit(lightning_task, datamodule=task.datamodule) - ckpt_path = "best" if report_on_best_val else "last" - metrics = trainer.test( - lightning_task, - datamodule=task.datamodule, - verbose=False, - ckpt_path=ckpt_path, - ) - metrics = metrics[0] - - if delete_models_after_testing: - # delete the checkpoints' folder in the run - ckpts_folder = os.path.join( - trainer.logger.save_dir, # mlflow root dir - str(trainer.logger.name), # experiment_id - trainer.logger.version, # run_id - "checkpoints", - ) - shutil.rmtree(ckpts_folder) - - except Exception as e: - raise Exception(str(e)) - # warnings.warn(str(e)) - # return None - test_metric = "test/" + task.metric.split("/")[1] - mlflow.log_metric(f"test_{test_metric}", metrics[test_metric]) - return metrics[test_metric] - - -def rerun_best_from_backbone( - logger: logging.RootLogger, - parent_run_id: str, - output_path: str, - defaults: Defaults, - tasks: list[Task], - experiment_name: str, - storage_uri: str, - *args, - tmp_dir: str | None = None, - run_repetitions: int = 10, - backbone_import: str | None = None, - run_name: str | None = None, - n_trials: int = 1, - ray_storage_path: str | None = None, - save_models: bool = False, - report_on_best_val: bool = True, - run_id: str | None = None, - optimization_space: dict | None = None, - description: str | None = None, - use_ray=False, - **kwargs, -): - """Repeat best experiments from a benchmark run. Only works with a ray cluster. - - Args: - parent_run_id (str): mlflow id of parent run - output_path (str): path to store the results of the run - tmp_dir (str): Path to temporary directory to be used for ray - run_repetitions (int): How many runs (each with a different seed) to run per task. - - """ - if not os.path.isabs(output_path): - raise Exception( - f"output_path must be absolute. Consider using $(pwd)/{output_path}." - ) - if (tmp_dir is None) & (use_ray == True): - raise Exception("tmp_dir must be specified for runs with ray.") - - if use_ray: - os.environ["RAY_TMPDIR"] = tmp_dir - ray.init(_temp_dir=tmp_dir) - if backbone_import: - importlib.import_module(backbone_import) - mlflow.set_tracking_uri(storage_uri) - mlflow.set_experiment(experiment_name) - - runs: list[mlflow.entities.Run] = mlflow.search_runs( - filter_string=f"tags.mlflow.parentRunId='{parent_run_id}'", output_format="list" - ) # type: ignore - logger.info(f"\nparent_run_id {parent_run_id}") - logger.info(f"\nFound runs: {[run.info.run_name for run in runs]}") - - task_names = [task.name for task in tasks] - logger.info(f"Will only run the following: {task_names}") - - table_columns = [ - "Task", - "Metric", - "Score", - "mlflow_run_name", - "mlflow_run_id", - "mlflow_run_status", - ] - table_entries = [] - ray_tasks = [] - - repeated_storage_uri = f"{storage_uri}_repeated_exp" - if not os.path.exists(repeated_storage_uri): - os.makedirs(repeated_storage_uri) - - repeated_experiment_name = f"{experiment_name}_repeated_exp" - mlflow.set_tracking_uri(repeated_storage_uri) - mlflow.set_experiment(repeated_experiment_name) - - #backbone_name = defaults.terratorch_task["model_args"]["backbone"] - with mlflow.start_run(run_name=experiment_name, run_id=None) as run: - for task in tasks: - logger.info(f"\n\ntask: {task.name}") - matching_runs = [run for run in runs if run.info.run_name.endswith(task.name)] # type: ignore - if len(matching_runs) == 0: - msg = f"No runs found for task {task.name}. Skipping." - warnings.warn(msg) - continue - if len(matching_runs) > 1: - msg = f"More than 1 run found for task {task.name}" - raise Exception(msg) - - # check if there are already results for this task and exp in the folder - past_output_path = ( - f"{output_path.split(experiment_name)[0]}{experiment_name}_*" - ) - past_output_path = glob.glob(past_output_path) - if len(sorted(past_output_path)) > 0: - output_path = sorted(past_output_path)[0] - logger.info(f"output path: {output_path}") - if os.path.exists(output_path): - logger.info("there are previous results from repeated experiments") - existing_output = pd.read_csv(output_path) - existing_output = existing_output[table_columns] - existing_task_output = existing_output.loc[ - existing_output["Task"] == task.name - ].copy() - rows, cols = existing_task_output.shape - logger.info(f"rows: {rows} \t cols: {cols}") - if rows > run_repetitions: - logger.info("task has valid results, will not re-run") - continue - past_seeds = [ - int(item.split("_")[-1]) - for item in existing_task_output["mlflow_run_name"].tolist() - ] - else: - past_seeds = [] - logger.info(f"past_seeds for task: {past_seeds}") - - best_params = matching_runs[0].data.params - best_params = {k: literal_eval(v) for k, v in best_params.items()} - training_spec = combine_with_defaults(task, defaults) - lightning_task_class = training_spec.task.type.get_class_from_enum() - - if use_ray: # experimental - successful_seeds = [randint(1, 5000) for i in range(run_repetitions)] - for seed in successful_seeds: - ray_tasks.append( - remote_fit.remote( - training_spec, - lightning_task_class, - best_params, - seed, - backbone_import=backbone_import, - ) - ) - else: - experiment_info = mlflow.get_experiment_by_name( - repeated_experiment_name - ) - seeds = [randint(1, 5000) for i in range(run_repetitions * 3)] - seeds = [seed for seed in seeds if seed not in past_seeds] - - for seed in seeds: - if len(past_seeds) >= run_repetitions: - break - - seed_run_name = f"{task.name}_{seed}" - logger.info(f"now trying: {seed_run_name}") - seed_run_data = mlflow.search_runs( - experiment_ids=[experiment_info.experiment_id], - filter_string=f'tags."mlflow.runName" LIKE "{seed_run_name}"', - output_format="list", - ) # type: ignore - if len(seed_run_data) > 0: - for item in seed_run_data: - logger.info(f"deleting existing run: {item}") - mlflow.delete_run(item.info.run_id) - - score = non_remote_fit( - experiment_name=repeated_experiment_name, - parent_run_id=run.info.run_id, - storage_uri=repeated_storage_uri, - task=task, - training_spec=training_spec, - lightning_task_class=lightning_task_class, - best_params=best_params, - seed=seed, - backbone_import=backbone_import, - save_models=save_models, - report_on_best_val=report_on_best_val, - ) - # check if run with name finished successfully - logger.info(f"score: {score}") - # TODO improve this sleep command - try to get a better estimate than this - time.sleep(60) - seed_run_data = mlflow.search_runs( - experiment_ids=[experiment_info.experiment_id], - filter_string=f'tags."mlflow.runName" LIKE "{seed_run_name}"', - output_format="list", - ) # type: ignore - - logger.info( - f"run for task {task.name} seed {seed} complete" - ) - if len(seed_run_data) > 0: - if seed_run_data[0].info.status != "FINISHED": - mlflow.delete_run(seed_run_data[0].info.run_id) - continue - past_seeds.append(seed) - new_data = pd.DataFrame( - { - "Task": [task.name], - "Metric": [task.metric.split("/")[-1]], - "Score": [score], - "mlflow_run_name": [seed_run_name], - "mlflow_run_id": [seed_run_data[0].info.run_id], - "mlflow_run_status": [seed_run_data[0].info.status], - } - ) - logger.info( - f"completed seeds so far for this task: {len(past_seeds)}" - ) - if os.path.exists(output_path): - logger.info( - "there are previous results from repeated experiments" - ) - existing_output = pd.read_csv(output_path) - existing_output = existing_output[table_columns] - existing_output.reset_index(inplace=True) - existing_task_output = existing_output.loc[ - existing_output["Task"] == task.name - ].copy() - rows, cols = existing_task_output.shape - logger.info(f"rows: {rows} \t cols: {cols}") - if rows == 0: - logger.info("no past results for this task") - existing_output = pd.concat( - [existing_output, new_data], axis=0 - ) - existing_output.reset_index(inplace=True) - existing_output.to_csv(output_path, index=False) - else: - new_data.to_csv(output_path, index=False) - - if use_ray: # experimental - results = ray.get(ray_tasks) - table_entries = [ - [ - task.name, - task.metric.split("/")[-1], - result, - matching_runs[0].info.run_id, - ] - for task, result in zip( - [task for task in tasks for _ in seeds], results - ) # expand tasks - ] - - table = tabulate(table_entries, headers=table_columns) - logger.info(table) - df = pd.DataFrame(data=table_entries, columns=table_columns) - df.to_csv(output_path, index=False) - ray.shutdown() - - -def main(): - CLI(rerun_best_from_backbone, fail_untyped=False) - - -if __name__ == "__main__": - main() diff --git a/terratorch_iterate/resources/dataset_specifications/agb.yaml b/terratorch_iterate/resources/dataset_specifications/agb.yaml deleted file mode 100644 index 33e9c95..0000000 --- a/terratorch_iterate/resources/dataset_specifications/agb.yaml +++ /dev/null @@ -1,64 +0,0 @@ -class_path: terratorch.datamodules.GenericNonGeoPixelwiseRegressionDataModule -init_args: - batch_size: 16 - num_workers: 4 - train_transform: - - class_path: albumentations.HorizontalFlip - init_args: - p: 0.5 - - class_path: albumentations.augmentations.geometric.rotate.Rotate - init_args: - limit: 30 - border_mode: 0 # cv2.BORDER_CONSTANT - # value: 0 - # mask_value: 1 - p: 0.5 - dict_kwargs: - value: 0 - mask_value: 1 - - class_path: ToTensorV2 - dataset_bands: - - 0 - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - - 1 - - 2 - - 3 - - 4 - output_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - rgb_indices: - - 2 - - 1 - - 0 - train_data_root: /dccstor/hhr-weather/latest_filters_all_agb_patches_tts_clipped_0_500/train_images - train_label_data_root: /dccstor/hhr-weather/latest_filters_all_agb_patches_tts_clipped_0_500/train_labels - val_data_root: /dccstor/hhr-weather/latest_filters_all_agb_patches_tts_clipped_0_500/val_images - val_label_data_root: /dccstor/hhr-weather/latest_filters_all_agb_patches_tts_clipped_0_500/val_labels - test_data_root: /dccstor/hhr-weather/latest_filters_all_agb_patches_tts_clipped_0_500/test_images - test_label_data_root: /dccstor/hhr-weather/latest_filters_all_agb_patches_tts_clipped_0_500/test_labels - # img_grep: "*.tif" - # label_grep: "*.tif" - means: - - 385.88501817 - - 714.60615207 - - 658.96267376 - - 3314.57774238 - - 2238.71812558 - - 1250.00982518 - stds: - - 264.62872 - - 355.62848 - - 504.54855 - - 898.4953 - - 947.22894 - - 828.1297 diff --git a/terratorch_iterate/resources/dataset_specifications/eurosat.yaml b/terratorch_iterate/resources/dataset_specifications/eurosat.yaml deleted file mode 100644 index 029ee51..0000000 --- a/terratorch_iterate/resources/dataset_specifications/eurosat.yaml +++ /dev/null @@ -1,28 +0,0 @@ -class_path: terratorch.datamodules.TorchNonGeoDataModule -init_args: - transforms: - # a possible way to select bands: - # - class_path: SelectBands - # init_args: - # band_indices: - # - 2 - # - 1 - # - 0 - - class_path: albumentations.augmentations.geometric.resize.Resize - dict_kwargs: - height: 224 - width: 224 - - class_path: ToTensorV2 - cls: torchgeo.datamodules.EuroSATDataModule - batch_size: 16 - num_workers: 4 -dict_kwargs: - root: /dccstor/geofm-pre/EuroSat - download: True - bands: - - B02 - - B03 - - B04 - - B08A - - B11 - - B12 diff --git a/terratorch_iterate/resources/dataset_specifications/fire_scars.yaml b/terratorch_iterate/resources/dataset_specifications/fire_scars.yaml deleted file mode 100644 index a2f50a1..0000000 --- a/terratorch_iterate/resources/dataset_specifications/fire_scars.yaml +++ /dev/null @@ -1,56 +0,0 @@ -class_path: terratorch.datamodules.GenericNonGeoSegmentationDataModule -init_args: - batch_size: 4 - num_workers: 8 - dataset_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - output_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - rgb_indices: - - 2 - - 1 - - 0 - train_transform: - - class_path: albumentations.RandomCrop - init_args: - height: 224 - width: 224 - - class_path: albumentations.HorizontalFlip - init_args: - p: 0.5 - - class_path: ToTensorV2 - no_data_replace: 0 - no_label_replace: -1 - train_data_root: /dccstor/geofm-finetuning/fire-scars/finetune-data/6_bands_no_replant_extended/training - train_label_data_root: /dccstor/geofm-finetuning/fire-scars/finetune-data/6_bands_no_replant_extended/training - val_data_root: /dccstor/geofm-finetuning/fire-scars/finetune-data/6_bands_no_replant_extended/validation - val_label_data_root: /dccstor/geofm-finetuning/fire-scars/finetune-data/6_bands_no_replant_extended/validation - test_data_root: /dccstor/geofm-finetuning/fire-scars/finetune-data/6_bands_no_replant_extended/validation - test_label_data_root: /dccstor/geofm-finetuning/fire-scars/finetune-data/6_bands_no_replant_extended/validation - img_grep: "*_merged.tif" - label_grep: "*.mask.tif" - means: - - 0.033349706741586264 - - 0.05701185520536176 - - 0.05889748132001316 - - 0.2323245113436119 - - 0.1972854853760658 - - 0.11944914225186566 - stds: - - 0.02269135568823774 - - 0.026807560223070237 - - 0.04004109844362779 - - 0.07791732423672691 - - 0.08708738838140137 - - 0.07241979477437814 - num_classes: 2 diff --git a/terratorch_iterate/resources/dataset_specifications/multi_temporal_crop.yaml b/terratorch_iterate/resources/dataset_specifications/multi_temporal_crop.yaml deleted file mode 100644 index bc30877..0000000 --- a/terratorch_iterate/resources/dataset_specifications/multi_temporal_crop.yaml +++ /dev/null @@ -1,57 +0,0 @@ -class_path: terratorch.datamodules.GenericNonGeoSegmentationDataModule -init_args: - batch_size: 8 - num_workers: 12 - train_transform: - - class_path: FlattenTemporalIntoChannels - - class_path: albumentations.Flip - - class_path: ToTensorV2 - - class_path: UnflattenTemporalFromChannels - init_args: - n_timesteps: 3 - dataset_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - output_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - rgb_indices: - - 2 - - 1 - - 0 - reduce_zero_label: True - expand_temporal_dimension: True - train_data_root: /dccstor/geofm-finetuning/hls_cdl_reclassed/training_chips - train_label_data_root: /dccstor/geofm-finetuning/hls_cdl_reclassed/training_chips - val_data_root: /dccstor/geofm-finetuning/hls_cdl_reclassed/validation_chips - val_label_data_root: /dccstor/geofm-finetuning/hls_cdl_reclassed/validation_chips - test_data_root: /dccstor/geofm-finetuning/hls_cdl_reclassed/validation_chips - test_label_data_root: /dccstor/geofm-finetuning/hls_cdl_reclassed/validation_chips - train_split: /dccstor/geofm-finetuning/hls_cdl_reclassed/training_chips/training_data.txt - test_split: /dccstor/geofm-finetuning/hls_cdl_reclassed/validation_chips/validation_data.txt - val_split: /dccstor/geofm-finetuning/hls_cdl_reclassed/validation_chips/validation_data.txt - img_grep: "*_merged.tif" - label_grep: "*.mask.tif" - means: - - 494.905781 - - 815.239594 - - 924.335066 - - 2968.881459 - - 2634.621962 - - 1739.579917 - stds: - - 284.925432 - - 357.84876 - - 575.566823 - - 896.601013 - - 951.900334 - - 921.407808 - num_classes: 13 diff --git a/terratorch_iterate/resources/dataset_specifications/sen1floods11.yaml b/terratorch_iterate/resources/dataset_specifications/sen1floods11.yaml deleted file mode 100644 index d3201e1..0000000 --- a/terratorch_iterate/resources/dataset_specifications/sen1floods11.yaml +++ /dev/null @@ -1,59 +0,0 @@ -class_path: terratorch.datamodules.GenericNonGeoSegmentationDataModule -init_args: - batch_size: 8 - num_workers: 4 - constant_scale: 0.0001 - dataset_bands: - - COASTAL_AEROSOL - - BLUE - - GREEN - - RED - - RED_EDGE_1 - - RED_EDGE_2 - - RED_EDGE_3 - - NIR_BROAD - - NIR_NARROW - - WATER_VAPOR - - CIRRUS - - SWIR_1 - - SWIR_2 - output_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - rgb_indices: - - 2 - - 1 - - 0 - train_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/S2Hand/ - train_label_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/LabelHand - val_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/S2Hand/ - val_label_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/LabelHand - test_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/S2Hand/ - test_label_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/LabelHand - # these must be obtained by running terratorch/examples/scripts/convert_sen1floods11_splits.py on the original split csv files - train_split: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/splits/flood_handlabeled/flood_train_data.txt - test_split: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/splits/flood_handlabeled/flood_test_data.txt - val_split: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/splits/flood_handlabeled/flood_valid_data.txt - img_grep: "*_S2Hand.tif" - label_grep: "*_LabelHand.tif" - no_label_replace: -1 - no_data_replace: 0 -means: - - 0.1412956 - - 0.13795798 - - 0.12353792 - - 0.30902815 - - 0.2044958 - - 0.11912015 -stds: - - 0.07406382 - - 0.07370365 - - 0.08692279 - - 0.11798815 - - 0.09772074 - - 0.07659938 -num_classes: 2 diff --git a/terratorch_iterate/resources/dataset_specifications/sen1floods11_transforms.yaml b/terratorch_iterate/resources/dataset_specifications/sen1floods11_transforms.yaml deleted file mode 100644 index ffea683..0000000 --- a/terratorch_iterate/resources/dataset_specifications/sen1floods11_transforms.yaml +++ /dev/null @@ -1,67 +0,0 @@ -class_path: terratorch.datamodules.GenericNonGeoSegmentationDataModule -init_args: - batch_size: 8 - num_workers: 4 - constant_scale: 0.0001 - dataset_bands: - - COASTAL_AEROSOL - - BLUE - - GREEN - - RED - - RED_EDGE_1 - - RED_EDGE_2 - - RED_EDGE_3 - - NIR_BROAD - - NIR_NARROW - - WATER_VAPOR - - CIRRUS - - SWIR_1 - - SWIR_2 - output_bands: - - BLUE - - GREEN - - RED - - NIR_NARROW - - SWIR_1 - - SWIR_2 - rgb_indices: - - 2 - - 1 - - 0 - train_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/S2Hand/ - train_label_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/LabelHand - val_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/S2Hand/ - val_label_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/LabelHand - test_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/S2Hand/ - test_label_data_root: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/data/flood_events/HandLabeled/LabelHand - # these must be obtained by running terratorch/examples/scripts/convert_sen1floods11_splits.py on the original split csv files - train_split: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/splits/flood_handlabeled/flood_train_data.txt - test_split: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/splits/flood_handlabeled/flood_test_data.txt - val_split: /dccstor/geofm-finetuning/datasets/sen1floods11/v1.1/splits/flood_handlabeled/flood_valid_data.txt - img_grep: "*_S2Hand.tif" - label_grep: "*_LabelHand.tif" - no_label_replace: -1 - no_data_replace: 0 - train_transform: - - class_path: albumentations.HorizontalFlip - init_args: - p: 0.5 - - class_path: albumentations.VerticalFlip - init_args: - p: 0.5 - - class_path: ToTensorV2 - means: - - 0.1412956 - - 0.13795798 - - 0.12353792 - - 0.30902815 - - 0.2044958 - - 0.11912015 - stds: - - 0.07406382 - - 0.07370365 - - 0.08692279 - - 0.11798815 - - 0.09772074 - - 0.07659938 - num_classes: 2 diff --git a/terratorch_iterate/tests/__init__.py b/terratorch_iterate/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/terratorch_iterate/utils.py b/terratorch_iterate/utils.py deleted file mode 100644 index d6564c4..0000000 --- a/terratorch_iterate/utils.py +++ /dev/null @@ -1,866 +0,0 @@ -import os -from typing import Any, Dict -import mlflow -import datetime -import logging -from pathlib import Path -import pandas as pd -import numpy as np -import seaborn as sns -from matplotlib import pyplot as plt -from ast import literal_eval -import optuna -from benchmark.benchmark_types import Task -from benchmark import plot_tools -import sys -from mlflow.entities.experiment import Experiment -import importlib -import logging - -SEGMENTATION_BASE_TASKS = [ - 'chesapeake', - 'sa_crop_type', - 'pv4ger_seg', - 'cashew', - 'neontree', - 'nz_cattle', -] -CLASSIFICATION_BASE_TASKS = [ - 'pv4ger', - 'so2sat', - 'brick_kiln', - 'big_earth_net', - 'eurosat', - 'forestnet', -] -N_TRIALS_DEFAULT = 16 -REPEATED_SEEDS_DEFAULT = 10 -DATA_PARTITIONS = { - "default": 100, - "1.00x_train": 100, - "0.50x_train": 50, - "0.20x_train": 20, - "0.10x_train": 10, - "0.01x_train": 1, -} - - -def unflatten(dictionary: Dict[str, Any]): - resultDict: Dict = {} - for key, value in dictionary.items(): - parts = key.split(".") - d = resultDict - for part in parts[:-1]: - if part not in d: - d[part] = {} - d = d[part] - d[parts[-1]] = value - return resultDict - - -def sync_mlflow_optuna( - optuna_db_path: str, - storage_uri: str, - experiment_name: str, - task_run_id: str | None, - task: Task, - n_trials: int, - logger: logging.RootLogger, -) -> str | None: - """ - syncs the number of completed trials in mflow and optuna - Args: - optuna_db_path: path to optuna database - storage_uri: path to mlflow storage folder - experiment_name: name on experiment in mlflow - task_run_id: run_id of the task - task: name of the task - logger: logging.RootLogger to save logs to file - Returns: - task_run_id: run id of the task to be continued (if one exists) or None - """ - # check number of successful mlflow runs in task - client = mlflow.tracking.MlflowClient(tracking_uri=storage_uri) - completed_in_mlflow_for_task = [] - all_mlflow_runs_for_task = [] - if task_run_id is not None: - all_mlflow_runs_for_task.append(task_run_id) - logger.info(f"task_run_id : {task_run_id}") - experiment_info = client.get_experiment_by_name(experiment_name) - assert isinstance( - experiment_info, Experiment - ), f"Error! Unexpected type of {experiment_info=}" - individual_run_data = client.search_runs( - experiment_ids=[experiment_info.experiment_id], - filter_string=f'tags."mlflow.parentRunId" LIKE "{task_run_id}"', - ) - for individual_run in individual_run_data: - if individual_run.info.status == "FINISHED": - completed_in_mlflow_for_task.append(individual_run.info.run_id) - all_mlflow_runs_for_task.append(individual_run.info.run_id) - - # check number of successful optuna trials in the database - study_names = optuna.study.get_all_study_names( - storage="sqlite:///{}.db".format(optuna_db_path) - ) - if task.name in study_names: - loaded_study = optuna.load_study( - study_name=task.name, storage="sqlite:///{}.db".format(optuna_db_path) - ) - logger.info(f"loaded_study has : {len(loaded_study.trials)} trials") - incomplete = 0 - for trial in loaded_study.trials: - if (trial.state == optuna.trial.TrialState.FAIL) | ( - trial.state == optuna.trial.TrialState.RUNNING - ): - incomplete += 1 - logger.info(f"{incomplete} trials are incomplete") - successful_optuna_trials = len(loaded_study.trials) - incomplete - too_many_trials = successful_optuna_trials > n_trials - no_existing_task = task_run_id is None - optuna_mlflow_mismatch = ( - len(completed_in_mlflow_for_task) != successful_optuna_trials - ) - logger.info( - f"successful optuna trials {successful_optuna_trials} . mlflow runs {len(completed_in_mlflow_for_task)}" - ) - - if too_many_trials or no_existing_task or optuna_mlflow_mismatch: - logger.info(f"deleting study with name {task.name}") - logger.info(f"too_many_trials {too_many_trials}") - logger.info(f"no_existing_task {no_existing_task}") - - # delete optuna study in database - optuna.delete_study( - study_name=task.name, storage="sqlite:///{}.db".format(optuna_db_path) - ) - - # delete any existing mlflow runs - if len(all_mlflow_runs_for_task) > 0: - for item in all_mlflow_runs_for_task: - logger.info(f"deleting {item}") - client.delete_run(item) - assert isinstance( - experiment_info, Experiment - ), f"Error! Unexpected type of {experiment_info=}" - os.system(f"rm -r {experiment_info.artifact_location}/{item}") - task_run_id = None - else: - # delete any existing mlflow runs - if len(all_mlflow_runs_for_task) > 0: - for item in all_mlflow_runs_for_task: - logger.info(f"deleting {item}") - client.delete_run(item) - assert isinstance( - experiment_info, Experiment - ), f"Error! Unexpected type of {experiment_info=}" - os.system(f"rm -r {experiment_info.artifact_location}/{item}") - task_run_id = None - return task_run_id - - -def extract_repeated_experiment_results( - storage_uri: str, - logger: logging.RootLogger, - experiments: list, - num_repetitions: int = REPEATED_SEEDS_DEFAULT, - task_names: list = SEGMENTATION_BASE_TASKS, -) -> (pd.DataFrame, list): - """ - extracts results of repeated experiments from mlflow logs and saves them in csv - save list of incomplete experiments to a txt file - Args: - storage_uri: path to mlflow storage folder - logger: logging.RootLogger to save logs to file - experiments: list of experiment names - num_repetitions: number of repeated seeds per task - task_names: list of tasks - """ - if Path(storage_uri).exists() and Path(storage_uri).is_dir(): - storage_uri = Path(storage_uri) - repeated_exp_storage_uri = storage_uri.with_name( - f"{storage_uri.name}_repeated_exp" - ) - else: - print("Please use a valid directory for storage_uri") - raise ValueError - logger.info( - f"\n Extracting results of repeated experiments from: {str(repeated_exp_storage_uri)}" - ) - client = mlflow.tracking.MlflowClient(tracking_uri=str(repeated_exp_storage_uri)) - experiments = list(set(experiments)) - incomplete_experiments = [] - num_tasks = len(task_names) - combine_exp_results = [] - - for original_experiment_name in experiments: - experiment_name = f"{original_experiment_name}_repeated_exp" - logger.info(f"\nexperiment_name: {experiment_name}") - experiment_info = client.get_experiment_by_name(experiment_name) - if experiment_info is None: - logger.info( - f"EXPERIMENT {experiment_name} DOES NOT EXIST IN THIS FOLDER: {str(repeated_exp_storage_uri)}" - ) - incomplete_experiments.append(experiment_name) - continue - experiment_id = experiment_info.experiment_id - logger.info(f"experiment_id: {experiment_id}") - logger.info(f"experiment_info: {experiment_info}") - experiment_parent_run_data = client.search_runs(experiment_ids=[experiment_id]) - run_names = [] - run_ids = [] - run_seed = [] - run_task = [] - run_score = [] - run_metric = [] - run_status = [] - exp_ids = [] - exp_names = [] - logger.info(f"experiment_parent_run_data: {len(experiment_parent_run_data)}") - for run in experiment_parent_run_data: - run_name = run.info.run_name - task = "_".join(run_name.split("_")[:-1]) - if (task in task_names) and (run.info.status == "FINISHED"): - seed = int(run.info.run_name.split("_")[-1]) - if task in SEGMENTATION_BASE_TASKS: - metric_name = 'test_test/Multiclass_Jaccard_Index' - else: # conditions for other task types to be added - if task == "big_earth_net": - metric_name = 'test_test/Multilabel_F1_Score' - else: - metric_name = 'test_test/Overall_Accuracy' - - if metric_name not in run.data.metrics: - continue - score = run.data.metrics[metric_name] - run_names.append(run.info.run_name) - exp_ids.append(experiment_id) - exp_names.append(original_experiment_name) - run_ids.append(run.info.run_id) - run_status.append(run.info.status) - run_seed.append(seed) - run_metric.append(metric_name.split("/")[-1]) - run_task.append(task) - run_score.append(score) - - df = pd.DataFrame( - { - "dataset": run_task, - "Metric": run_metric, - "test metric": run_score, - "mlflow_run_name": run_names, - "mlflow_run_id": run_ids, - "mlflow_run_status": run_status, - "Seed": run_seed, - "experiment_id": exp_ids, - "experiment_name": exp_names, - } - ) - if len(run_task) == 0: - logger.info( - f"EXPERIMENT INCOMPLETE: {experiment_name} has no complete tasks." - ) - incomplete_experiments.append(experiment_name) - continue - print(f"\n\n\ndf: {df}") - - # get successful results per task - combine_task_results = [] - for task in task_names: - task_df = df.loc[ - (df["dataset"] == task) & (df["mlflow_run_status"] == "FINISHED") - ].copy() - task_df = task_df.loc[(task_df["test metric"] != 0.0)].copy() - rows, _ = task_df.shape - if (rows >= num_repetitions) and ( - sum(np.isnan(task_df["test metric"])) == 0 - ): - task_df = task_df.iloc[list(range(num_repetitions))].copy() - combine_task_results.append(task_df) - elif rows < num_repetitions: - logger.info(f"TASK INCOMPLETE: {task} only has {rows} seeds") - incomplete_experiments.append(experiment_name) - if len(combine_task_results) > 0: - combine_task_results = pd.concat(combine_task_results, axis=0) - combine_exp_results.append(combine_task_results) - if len(combine_task_results) < num_tasks: - logger.info( - f"EXPERIMENT INCOMPLETE: {experiment_name} has {len(combine_task_results)} complete tasks only" - ) - incomplete_experiments.append(experiment_name) - combine_exp_results = pd.concat(combine_exp_results, axis=0) - print(f"\n\n\ncombine_exp_results: {combine_exp_results}") - return (combine_exp_results, incomplete_experiments) - - -def extract_parameters( - storage_uri: str, - logger: logging.RootLogger, - experiments: list, - task_names: list = SEGMENTATION_BASE_TASKS, -) -> pd.DataFrame: - """ - extracts hyper-parameter information for each experiment from the mlflow logs - saves this information to a csv file - - Args: - storage_uri: path to mlflow storage folder used in configs - logger: logging.RootLogger to save logs to file - experiment_data: list of experiment names - task_names: list of tasks - """ - logger.info(f"\n Extracting parameters of experiments from: {storage_uri}") - experiments = list(set(experiments)) - all_params = [] - client = mlflow.tracking.MlflowClient(tracking_uri=storage_uri) - for experiment_name in experiments: - # get experiment id - experiment_info = client.get_experiment_by_name(experiment_name) - if experiment_info is None: - continue - experiment_id = experiment_info.experiment_id - logger.info(f"\nexperiment_name: {experiment_name} ") - logger.info(f"experiment_id: {experiment_info.experiment_id}") - exp_parent_run_name = f"top_run_{experiment_name}" - experiment_parent_run_data = client.search_runs( - experiment_ids=[experiment_id], - filter_string=f'tags."mlflow.runName" LIKE "{exp_parent_run_name}"', - ) - if (len(experiment_parent_run_data) > 1) or ( - len(experiment_parent_run_data) == 0 - ): - logger.debug( - f"The number of parent runs for each experiment should be 1. \ - It is currently {len(experiment_parent_run_data)}" - ) - raise RuntimeError - for run in experiment_parent_run_data: - exp_parent_run_id = run.info.run_id - - mlflow.set_tracking_uri(storage_uri) - mlflow.set_experiment(experiment_name) - runs: list[mlflow.entities.Run] = mlflow.search_runs( - filter_string=f"tags.mlflow.parentRunId='{exp_parent_run_id}'", - output_format="list", - ) # type: ignore - logger.info(f"Found runs: {[run.info.run_name for run in runs]}") - - for task in task_names: - logger.info(f"task: {task}") - matching_runs = [run for run in runs if run.info.run_name.endswith(task)] # type: ignore - best_params = matching_runs[0].data.params - - # eval them - best_params = {k: literal_eval(v) for k, v in best_params.items()} - best_params["experiment_name"] = experiment_name - best_params["dataset"] = task - best_params["decoder"] = matching_runs[0].data.tags["decoder"] - best_params["backbone"] = matching_runs[0].data.tags["backbone"] - best_params["early_stop_patience"] = matching_runs[0].data.tags[ - "early_stop_patience" - ] - best_params["n_trials"] = matching_runs[0].data.tags["n_trials"] - best_params["partition_name"] = matching_runs[0].data.tags["partition_name"] - best_params["data_percentages"] = DATA_PARTITIONS[ - best_params["partition_name"] - ] - if 'optimizer_hparams' in best_params: - logger.info( - f"optimizer_hparams: {best_params['optimizer_hparams'].items()}" - ) - optimizer_hparams = { - k: v for k, v in best_params['optimizer_hparams'].items() - } - best_params.update(optimizer_hparams) - del best_params['optimizer_hparams'] - if 'model_args' in best_params: - model_args = {k: v for k, v in best_params['model_args'].items()} - best_params.update(model_args) - del best_params['model_args'] - - best_params = pd.DataFrame(best_params, index=[0]) - all_params.append(best_params) - all_params = pd.concat(all_params, axis=0) - all_params = all_params.reset_index() - return all_params - - -def get_results_and_parameters( - storage_uri: str, - logger: logging.RootLogger, - experiments: list, - task_names: list = SEGMENTATION_BASE_TASKS + CLASSIFICATION_BASE_TASKS, - num_repetitions: int = REPEATED_SEEDS_DEFAULT, -) -> pd.DataFrame: - """ - extracts results and parameters for experiments from mlflow logs - - Args: - storage_uri: path to mlflow storage folder used in configs - logger: logging.RootLogger to save logs to file - experiment_data: list of experiment names - task_names: list of tasks - num_repetitions: number of repeated seeds per task - Returns: - pd.DataFrame with results and parameters - """ - if Path(storage_uri).exists() and Path(storage_uri).is_dir(): - results_dir = Path(storage_uri).parents[0] / "summarized_results" - else: - print("Please use a valid directory for storage_uri") - raise ValueError - if not os.path.exists(results_dir): - os.makedirs(results_dir) - - parameters = extract_parameters( - storage_uri=storage_uri, - logger=logger, - experiments=experiments, - task_names=task_names, - ) - - # extract repeated experiment results from mlflow logs - (results, incomplete_experiments) = extract_repeated_experiment_results( - storage_uri=storage_uri, - logger=logger, - experiments=experiments, - num_repetitions=num_repetitions, - task_names=task_names, - ) - - with open(f"{results_dir}/incomplete_experiments.txt", 'w') as f: - for line in incomplete_experiments: - f.write(f"{line}\n") - results_and_parameters = results.merge( - parameters, on=['experiment_name', 'dataset'] - ) - results_and_parameters.to_csv( - f"{str(results_dir)}/results_and_parameters.csv", index=False - ) - return results_and_parameters - - -def delete_nested_experiment_parent_runs( - logger: logging.RootLogger, - delete_runs: list, - experiment_info: mlflow.entities.experiment.Experiment, - client: mlflow.tracking.client.MlflowClient, - leave_one: bool = True, -) -> str | None: - """ - if there are multiple runs for a single experiment, - will delete all runs except the one with the most nested runs (most complete) - Args: - logger: logging.RootLogger to save logs to file - delete_runs: list of runs to delete - experiment_info: info of experiment - client: mlflow client pointing to correct storage uri - leave_one: if True, will not delete the most complete experiment. If False, will delete all experiments - Returns: - run id of the experiment run that was not deleted or None - """ - experiment_id = experiment_info.experiment_id - exp_parent_run_ids = [] - counts = [] - runs_in_experiment = [] - logger.info(f"Deleting from experiment_id:{experiment_id} ") - logger.info(f"delete_runs:{delete_runs} ") - - for exp_parent_run_id in delete_runs: - runs = [] - runs.append(exp_parent_run_id) - task_parent_run_data = client.search_runs( - experiment_ids=[experiment_id], - filter_string=f'tags."mlflow.parentRunId" LIKE "{exp_parent_run_id}"', - ) - for task_parent_run in task_parent_run_data: - task_parent_run_id = task_parent_run.info.run_id - runs.append(task_parent_run_id) - individual_run_data = client.search_runs( - experiment_ids=[experiment_id], - filter_string=f'tags."mlflow.parentRunId" LIKE "{task_parent_run_id}"', - ) - for individual_run in individual_run_data: - runs.append(individual_run.info.run_id) - exp_parent_run_ids.append(exp_parent_run_id) - counts.append(len(runs)) - runs_in_experiment.append(runs) - - if leave_one and (len(counts) > 0): - index_to_keep = counts.index(max(counts)) - incomplete_run_to_finish = exp_parent_run_ids[index_to_keep] - runs_in_experiment.pop(index_to_keep) - else: - incomplete_run_to_finish = None - - logger.info(f"Deleting runs:{runs_in_experiment} ") - logger.info( - f"experiment_info.artifact_location:{experiment_info.artifact_location}" - ) - for runs in runs_in_experiment: - for run_id in runs: - client.delete_run(run_id) - os.system(f"rm -r {experiment_info.artifact_location}/{run_id}") - return incomplete_run_to_finish - - -def check_existing_task_parent_runs( - logger: logging.RootLogger, - exp_parent_run_id: str, - storage_uri: str, - experiment_name: str, - n_trials: int = N_TRIALS_DEFAULT, -): - """ - checks if tasks have been completed (both task run and nested individual runs are complete) - Args: - logger: logging.RootLogger to save logs to file - exp_parent_run_id: run id of the experiment run being used (top level run id) - storage_uri: folder containing mlflow log data - experiment_name: name of experiment - n_trials: number of trials (runs) expected in HPO of each task - Returns: - complete_task_run_names: list of task names that have been completed - all_tasks_finished: bool showing if all tasks have been completed - task_run_to_id_match: dict matching task names to the task run id - - """ - client = mlflow.tracking.MlflowClient(tracking_uri=storage_uri) - experiment_info = client.get_experiment_by_name(experiment_name) - experiment_id = experiment_info.experiment_id - task_parent_run_data = client.search_runs( - experiment_ids=[experiment_id], - filter_string=f'tags."mlflow.parentRunId" LIKE "{exp_parent_run_id}"', - ) - complete_task_run_names = [] - all_tasks_finished = [] - # TO DO: make sure we only have one task_parent_run for each name (needed for repeated exps) - task_run_to_id_match = {} - for task_parent_run in task_parent_run_data: - task_run_statuses = [] - task_run_ids = [] - task_run_statuses.append(task_parent_run.info.status) - task_run_ids.append(task_parent_run.info.run_id) - - individual_run_data = client.search_runs( - experiment_ids=[experiment_id], - filter_string=f'tags."mlflow.parentRunId" LIKE "{task_parent_run.info.run_id}"', - ) - for individual_run in individual_run_data: - if (individual_run.info.status == "RUNNING") or ( - individual_run.info.status == "FAILED" - ): - continue - task_run_statuses.append(individual_run.info.status) - task_run_ids.append(individual_run.info.run_id) - - task_run_to_id_match[task_parent_run.info.run_name] = ( - task_parent_run.info.run_id - ) - task_run_statuses = list(set(task_run_statuses)) - - condition_1 = len(task_run_statuses) == 1 - condition_2 = task_run_statuses[0] == "FINISHED" - # condition_3 = len(task_run_ids) == (n_trials+1) - if condition_1 and condition_2: # and condition_3: - complete_task_run_names.append(task_parent_run.info.run_name) - task_parent_status = True - else: - task_parent_status = False - all_tasks_finished.append(task_parent_status) - - if all(all_tasks_finished) and (len(all_tasks_finished) > 0): - all_tasks_finished = True - else: - all_tasks_finished = False - complete_task_run_names = list(set(complete_task_run_names)) - return complete_task_run_names, all_tasks_finished, task_run_to_id_match - - -def check_existing_experiments( - logger: logging.RootLogger, - storage_uri: str, - experiment_name: str, - exp_parent_run_name: str, - task_names: list, - n_trials: int, - backbone: str -) -> Dict[str, Any]: - """ - checks if experiment has been completed (i.e. both task run and nested individual runs are complete) - Args: - logger: logging.RootLogger to save logs to file - storage_uri: folder containing mlflow log data - experiment_name: name of experiment - exp_parent_run_name: run name of the top level experiment run - task_names: list of task names that should be completed - n_trials: number of trials (runs) expected in HPO of each task - Returns: - output: dict with: - no_existing_runs: bool, if True, there are no existing runs - incomplete_run_to_finish: str | None, run id of the experiment run to finish - finished_run: str | None, run id of the finished experiment run - experiment_id: str | None, experiment id it experiment already exists - - """ - client = mlflow.tracking.MlflowClient(tracking_uri=storage_uri) - experiment_info = client.get_experiment_by_name(experiment_name) - - output = { - "no_existing_runs": True, - "incomplete_run_to_finish": None, - "finished_run": None, - "experiment_id": None, - } - if experiment_info is None: - return output - - experiment_id = experiment_info.experiment_id - logger.info(f"\nexperiment_id: {experiment_id}") - logger.info(f"experiment_name: {experiment_name}") - output["experiment_id"] = experiment_id - experiment_parent_run_data = client.search_runs( - experiment_ids=[experiment_id], - filter_string=f'tags."mlflow.runName" LIKE "{exp_parent_run_name}"', - ) - if len(experiment_parent_run_data) >= 1: - logger.info("there is at least one experiment parent run") - finished_run_id = None - incomplete_runs = [] - - # check if one of the runs is complete - for run in experiment_parent_run_data: - completed_task_run_names, all_tasks_in_experiment_finished, _ = ( - check_existing_task_parent_runs( - logger=logger, - exp_parent_run_id=run.info.run_id, - storage_uri=storage_uri, - experiment_name=experiment_name, - n_trials=n_trials, - ) - ) - logger.info(f"tasks that should be completed: {task_names}") - logger.info(f"completed_task_run_names: {completed_task_run_names}") - logger.info( - f"all_tasks_in_experiment_finished: {all_tasks_in_experiment_finished}" - ) - all_expected_tasks_completed = [ - item for item in task_names if item in completed_task_run_names - ] - all_expected_tasks_completed = len(task_names) == len( - all_expected_tasks_completed - ) - if all_expected_tasks_completed: - finished_run_id = run.info.run_id - logger.info( - f"The following run FINISHED and will be used for repeated experiments: {finished_run_id}" - ) - else: - incomplete_tasks = [ - item for item in task_names if item not in completed_task_run_names - ] - logger.info( - f"The following run {run.info.run_id} is incomplete, with status {run.info.status} and missing tasks: {incomplete_tasks}" - ) - incomplete_runs.append(run.info.run_id) - - if finished_run_id is not None: - # delete all incomplete runs - delete_nested_experiment_parent_runs( - logger=logger, - delete_runs=incomplete_runs, - experiment_info=experiment_info, - client=client, - leave_one=False, - ) - output["finished_run"] = finished_run_id - output["no_existing_runs"] = False - else: - # delete all incomplete runs, leave one - logger.info(f"incomplete_runs: {incomplete_runs}") - output["incomplete_run_to_finish"] = delete_nested_experiment_parent_runs( - logger=logger, - delete_runs=incomplete_runs, - experiment_info=experiment_info, - client=client, - leave_one=True, - ) - output["no_existing_runs"] = False - return output - - -def visualize_combined_results( - combined_results: pd.DataFrame, - storage_uri: str, - logger: logging.RootLogger, - plot_file_base_name: str, -): - """ - compiles and visualizes results from experiment - Args: - combined_results: table containing results and parameters for all experiments - storage_uri: storage_uri from config - logger: logging.RootLogger to save logs to file - plot_file_base_name: unique string to be added to all file names - """ - logger.info("Starting to visualize") - save_folder = Path(storage_uri).parents[0] / "visualizations" - tables_folder = save_folder / "tables" - plots_folder = save_folder / "plots" - if not os.path.exists(tables_folder): - os.makedirs(tables_folder) - if not os.path.exists(plots_folder): - os.makedirs(plots_folder) - - combined_results = [] - model_order = [] - experiments = list(set(combined_results["experiment_name"])) - combined_results = combined_results.rename(columns={"experiment_name": "model"}) - num_experiments = len(experiments) - fig_size = (num_experiments * 5, 6) if num_experiments >= 3 else (15, 6) - n_legend_rows = num_experiments // 3 if num_experiments >= 3 else 1 - model_order = sorted(experiments) - model_colors = dict( - zip(model_order, sns.color_palette("tab20", n_colors=len(model_order))) - ) - - try: - # plot raw values - plot_tools.plot_per_dataset( - combined_results, - model_order=model_order, - plot_file_base_name=plot_file_base_name, - model_colors=model_colors, - metric="test metric", - sharey=False, - inner="points", - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - plt.savefig( - str(plots_folder / f"violin_{plot_file_base_name}_raw.png"), - bbox_inches="tight", - ) - plt.close() - - # plot normalized, bootstrapped values values - plot_tools.make_normalizer( - combined_results, - metrics=("test metric",), - benchmark_name=plot_file_base_name, - ) - bootstrapped_iqm, normalized_combined_results = ( - plot_tools.normalize_bootstrap_and_plot( - combined_results, - plot_file_base_name=plot_file_base_name, - metric="test metric", - benchmark_name=plot_file_base_name, - model_order=model_order, - model_colors=model_colors, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - ) - # dataset_name_map=dataset_name_map) - - plt.savefig( - str( - plots_folder - / f"violin_{plot_file_base_name}_normalized_bootstrapped.png" - ), - bbox_inches="tight", - ) - plt.close() - bootstrapped_iqm.to_csv( - str(tables_folder / f"{plot_file_base_name}_bootstrapped_iqm.csv") - ) - combined_results.to_csv( - str( - tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv" - ) - ) - except Exception as e: - logger.info(f"could not visualize due to error: {e}") - - -def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.RootLogger: - # set up logging file - if not os.path.exists(log_folder): - os.makedirs(log_folder) - current_time = datetime.datetime.now() - current_time = ( - str(current_time).replace(" ", "_").replace(":", "-").replace(".", "-") - ) - log_file = f"{log_folder}/{current_time}" - logger = logging.getLogger() - logger.setLevel(log_level) - handler = logging.FileHandler(log_file) - handler.setLevel(log_level) - formatter = logging.Formatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - ) - handler.setFormatter(formatter) - logger.addHandler(handler) - logging.basicConfig(level=logging.CRITICAL) - return logger - -def import_custom_modules( - logger: logging.RootLogger, - custom_modules_path: str | Path | None = None, -) -> None: - - if custom_modules_path: - - custom_modules_path = Path(custom_modules_path) - - if custom_modules_path.is_dir(): - - # Add 'custom_modules' folder to sys.path - workdir = custom_modules_path.parents[0] - module_dir = custom_modules_path.name - - sys.path.insert(0, str(workdir)) - - try: - module = importlib.import_module(module_dir) - logger.info(f"Found {custom_modules_path}") - except ImportError: - raise ImportError(f"It was not possible to import modules from {custom_modules_path}.") - else: - raise ValueError(f"Modules path {custom_modules_path} isn't a directory. Check if you have defined it properly.") - else: - logger.debug("No custom module is being used.") - -if __name__ == "__main__": - logger = get_logger() - storage_uri = "results_folder/hpo" # storage_uri from config - - list_of_experiments = [ - "early_stopping_10_prithvi_600", - "early_stopping_10_prithvi_600_tl", - "early_stopping_10_dofa_vit_300", - ] - # get results and parameters from mlflow logs - results_and_parameters = get_results_and_parameters( - storage_uri=storage_uri, - logger=logger, - experiments=list_of_experiments, - ) - - settings_per_model = [ - "early_stopping_10_data_100_perc", - "early_stopping_50_data_10_perc", - "early_stopping_50_data_100_perc", - ] - - # create box plots across multiple models - for setting in settings_per_model: - combined_results = results_and_parameters.loc[ - results_and_parameters["experiment_name"].str.contains(setting) - ].copy() - model_order = visualize_combined_results( - combined_results=results_and_parameters, - storage_uri=storage_uri, - logger=logger, - plot_file_base_name=f"multiple_models_{setting}", - ) From 8737cc056680231b95de834fd12d23aa3a610bb0 Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 18 Jul 2025 14:12:46 +0000 Subject: [PATCH 05/16] fix continue existing run --- benchmark/backbone_benchmark.py | 3 +- benchmark/repeat_best_experiment.py | 16 +++++++-- benchmark/utils.py | 51 +++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/benchmark/backbone_benchmark.py b/benchmark/backbone_benchmark.py index 54934bc..bb0d6e6 100644 --- a/benchmark/backbone_benchmark.py +++ b/benchmark/backbone_benchmark.py @@ -232,6 +232,7 @@ def benchmark_backbone( mlflow.set_tracking_uri(storage_uri) mlflow.set_experiment(experiment_name) + experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id if bayesian_search: sampler: BaseSampler | None = None # take the default @@ -344,7 +345,7 @@ def benchmark_backbone( "results_table.json", run.info.run_id, ) - experiment_id = run.info.experiment_id + # experiment_id = run.info.experiment_id # check completion of HPO for all tasks before proceeding to next stage existing_experiments = check_existing_experiments( diff --git a/benchmark/repeat_best_experiment.py b/benchmark/repeat_best_experiment.py index 67a8bbc..1f39e5c 100644 --- a/benchmark/repeat_best_experiment.py +++ b/benchmark/repeat_best_experiment.py @@ -37,6 +37,9 @@ valid_task_types, ) +from .utils import get_nested_runs + + @ray.remote(num_cpus=8, num_gpus=1) def remote_fit( training_spec: TrainingSpec, @@ -261,6 +264,7 @@ def rerun_best_from_backbone( if backbone_import: importlib.import_module(backbone_import) mlflow.set_tracking_uri(storage_uri) + mlflow.set_experiment(experiment_name) runs: list[mlflow.entities.Run] = mlflow.search_runs( @@ -290,9 +294,17 @@ def rerun_best_from_backbone( repeated_experiment_name = f"{experiment_name}_repeated_exp" mlflow.set_tracking_uri(repeated_storage_uri) mlflow.set_experiment(repeated_experiment_name) - + experiment_id = mlflow.get_experiment_by_name(repeated_experiment_name).experiment_id + + tmp_runs = get_nested_runs(experiment_id, experiment_name, repeated_storage_uri) + if len(tmp_runs) > 0: + if len(tmp_runs) > 1: tmp_runs = [x for x in runs if x["run_name"] == experiment_name] + run_id = tmp_runs[0]["run_id"] + else: + run_id = None + #backbone_name = defaults.terratorch_task["model_args"]["backbone"] - with mlflow.start_run(run_name=experiment_name, run_id=None) as run: + with mlflow.start_run(run_name=experiment_name, run_id=run_id) as run: for task in tasks: logger.info(f"\n\ntask: {task.name}") matching_runs = [run for run in runs if run.info.run_name.endswith(task.name)] # type: ignore diff --git a/benchmark/utils.py b/benchmark/utils.py index 8d77675..c3b6636 100644 --- a/benchmark/utils.py +++ b/benchmark/utils.py @@ -16,6 +16,11 @@ from mlflow.entities.experiment import Experiment import importlib import logging +from mlflow.tracking import MlflowClient +from mlflow.entities import ViewType +from collections import defaultdict + + N_TRIALS_DEFAULT = 16 REPEATED_SEEDS_DEFAULT = 10 @@ -854,3 +859,49 @@ def import_custom_modules( logger=logger, plot_file_base_name=f"multiple_models_{setting}", ) + + + +### code written with the help of Perplexity platform +def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): + client = MlflowClient(mlflow_uri) + + # Get all runs for the experiment + all_runs = client.search_runs( + experiment_ids=[experiment_id], + run_view_type=ViewType.ACTIVE_ONLY + ) + + # Create a dictionary to store the run hierarchy + run_hierarchy = defaultdict(list) + parent_runs = [] + + # First pass: Identify parent-child relationships + for run in all_runs: + parent_run_id = run.data.tags.get("mlflow.parentRunId") + + if parent_run_id: + run_hierarchy[parent_run_id].append(run) + else: + parent_runs.append(run) + + # Function to create a nested dictionary for a run and its children + def create_nested_dict(run): + + run_dict = { + "run": run, + "run_id": run.info.run_id, + "run_name": run.data.tags.get("mlflow.runName", "Unnamed"), + "status": run.info.status, + "start_time": run.info.start_time, + "end_time": run.info.end_time, + "children": [create_nested_dict(child) for child in run_hierarchy[run.info.run_id]] + } + return run_dict + # Create the final nested structure + if filter_string: + nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string) > -1] + else: + nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs] + + return nested_runs From 5007ed42e38ec216667ef0844f74574ce0e750f0 Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 10 Oct 2025 11:18:27 +0000 Subject: [PATCH 06/16] fix paths --- benchmark/plot_tools.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/benchmark/plot_tools.py b/benchmark/plot_tools.py index 5ce6c82..184eb0a 100644 --- a/benchmark/plot_tools.py +++ b/benchmark/plot_tools.py @@ -10,7 +10,6 @@ import json from scipy.stats import trim_mean - sns.set_style("dark", {"grid.color": "0.98", "axes.facecolor": "(0.95, 0.95, 0.97)"}) GEO_BENCH_DIR = "geobench" @@ -28,7 +27,7 @@ def iqm(scores): def bootstrap_iqm( df, - group_keys=("model", "dataset", "partition name"), + group_keys=("model", "dataset", "partition_name"), metric="test_metric", repeat=100, ): @@ -43,12 +42,13 @@ def bootstrap_iqm( def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100): """Stratified bootstrap (by dataset) of all seeds to compute iqm score distribution for each model.""" - group = df.groupby(["model", "dataset", "partition name"]) + + group = df.groupby(["model", "dataset", "partition_name"]) df_list = [] for i in range(repeat): new_df = group.sample(frac=1, replace=True) - series = new_df.groupby(["model", "partition name"])[metric].apply(iqm) + series = new_df.groupby(["model", "partition_name"])[metric].apply(iqm) df_list.append(series.to_frame().reset_index()) new_df = pd.concat(df_list) @@ -57,7 +57,7 @@ def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100): def average_seeds( - df, group_keys=("model", "dataset", "partition name"), metric="test metric" + df, group_keys=("model", "dataset", "partition_name"), metric="test metric" ): """Average seeds for all model and all datasets.""" df_avg = df.groupby(list(group_keys))[metric].mean() @@ -70,8 +70,8 @@ def average_seeds( def extract_1x_data(df_all): """Extract only resutls trained on 100% of the data""" return df_all[ - (df_all["partition name"] == "1.00x train") - | (df_all["partition name"] == "default") + (df_all["partition_name"] == "1.00x train") + | (df_all["partition_name"] == "default") ].copy() @@ -150,13 +150,14 @@ def normalize_data_frame(self, df, metric): def save(self, benchmark_name): """Save normalizer to json file.""" - with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "w") as f: + + with open(f"{benchmark_name}/normalizer.json", "w") as f: json.dump(self.range_dict, f, indent=2) def load_normalizer(benchmark_name): """Load normalizer from json file.""" - with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "r") as f: + with open(f"{benchmark_name}/normalizer.json", "r") as f: range_dict = json.load(f) return Normalizer(range_dict) From c579d27b22ba59be5f5200dbca2f587f0a1b54bf Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 10 Oct 2025 11:19:27 +0000 Subject: [PATCH 07/16] fix_plots --- benchmark/utils.py | 117 +++++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 57 deletions(-) diff --git a/benchmark/utils.py b/benchmark/utils.py index c3b6636..77bc9c4 100644 --- a/benchmark/utils.py +++ b/benchmark/utils.py @@ -19,7 +19,7 @@ from mlflow.tracking import MlflowClient from mlflow.entities import ViewType from collections import defaultdict - +import pdb N_TRIALS_DEFAULT = 16 @@ -215,7 +215,7 @@ def extract_repeated_experiment_results( seed = int(run.info.run_name.split("_")[-1]) if task in task_info: metric_name = task_info[task] - metric_name = 'test_test/' + metric_name.split("/")[-1] + metric_name = 'test_test/' + metric_name.split("/")[-1] if '/' in metric_name else 'test_test_' + metric_name.replace(metric_name.split('_')[0] + "_", '') else: continue @@ -278,7 +278,10 @@ def extract_repeated_experiment_results( f"EXPERIMENT INCOMPLETE: {experiment_name} has {len(combine_task_results)} complete tasks only" ) incomplete_experiments.append(experiment_name) - combine_exp_results = pd.concat(combine_exp_results, axis=0) + if len(combine_exp_results) > 0: + combine_exp_results = pd.concat(combine_exp_results, axis=0) + else: + combine_exp_results = pd.DataFrame() print(f"\n\n\ncombine_exp_results: {combine_exp_results}") return (combine_exp_results, incomplete_experiments) @@ -383,6 +386,7 @@ def get_results_and_parameters( task_metrics: list, task_names: list, num_repetitions: int = REPEATED_SEEDS_DEFAULT, + visualise: bool = True, ) -> pd.DataFrame: """ extracts results and parameters for experiments from mlflow logs @@ -395,6 +399,7 @@ def get_results_and_parameters( task_metrics: metrics used to evaluate each task task_names: list of tasks num_repetitions: number of repeated seeds per task + visualise: whether to visualise the summarised results or not Returns: pd.DataFrame with results and parameters """ @@ -432,6 +437,16 @@ def get_results_and_parameters( results_and_parameters.to_csv( f"{str(results_dir)}/results_and_parameters.csv", index=False ) + + if visualise: + + model_order = visualize_combined_results( + combined_results=results_and_parameters, + storage_uri=storage_uri, + logger=logger, + plot_file_base_name=f"summary_plot", + ) + return results_and_parameters @@ -707,7 +722,6 @@ def visualize_combined_results( if not os.path.exists(plots_folder): os.makedirs(plots_folder) - combined_results = [] model_order = [] experiments = list(set(combined_results["experiment_name"])) combined_results = combined_results.rename(columns={"experiment_name": "model"}) @@ -719,63 +733,54 @@ def visualize_combined_results( zip(model_order, sns.color_palette("tab20", n_colors=len(model_order))) ) - try: - # plot raw values - plot_tools.plot_per_dataset( + plot_tools.plot_per_dataset( + combined_results, + model_order=model_order, + aggregated_name=plot_file_base_name, + model_colors=model_colors, + metric="test metric", + sharey=False, + inner="points", + fig_size=fig_size, + n_legend_rows=n_legend_rows, + ) + plt.savefig( + str(f"{plots_folder}/violin_{plot_file_base_name}_raw.png"), + bbox_inches="tight", + ) + plt.close() + + # plot normalized, bootstrapped values values + plot_tools.make_normalizer( + combined_results, + metrics=("test metric",), + benchmark_name=plots_folder, + ) + + tmp = ( + plot_tools.normalize_bootstrap_and_plot( combined_results, + # plot_file_base_name=plot_file_base_name, + metric="test metric", + benchmark_name=plots_folder, model_order=model_order, - plot_file_base_name=plot_file_base_name, model_colors=model_colors, - metric="test metric", - sharey=False, - inner="points", fig_size=fig_size, n_legend_rows=n_legend_rows, ) - plt.savefig( - str(plots_folder / f"violin_{plot_file_base_name}_raw.png"), - bbox_inches="tight", - ) - plt.close() + ) - # plot normalized, bootstrapped values values - plot_tools.make_normalizer( - combined_results, - metrics=("test metric",), - benchmark_name=plot_file_base_name, - ) - bootstrapped_iqm, normalized_combined_results = ( - plot_tools.normalize_bootstrap_and_plot( - combined_results, - plot_file_base_name=plot_file_base_name, - metric="test metric", - benchmark_name=plot_file_base_name, - model_order=model_order, - model_colors=model_colors, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - ) - # dataset_name_map=dataset_name_map) - - plt.savefig( - str( - plots_folder - / f"violin_{plot_file_base_name}_normalized_bootstrapped.png" - ), - bbox_inches="tight", - ) - plt.close() - bootstrapped_iqm.to_csv( - str(tables_folder / f"{plot_file_base_name}_bootstrapped_iqm.csv") - ) - combined_results.to_csv( - str( - tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv" - ) + plt.savefig( + str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png" + ), + bbox_inches="tight", + ) + plt.close() + + combined_results.to_csv( + str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv" ) - except Exception as e: - logger.info(f"could not visualize due to error: {e}") + ) def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.RootLogger: @@ -843,11 +848,9 @@ def import_custom_modules( ) settings_per_model = [ - "early_stopping_10_data_100_perc", - "early_stopping_50_data_10_perc", - "early_stopping_50_data_100_perc", + "detection", ] - + # create box plots across multiple models for setting in settings_per_model: combined_results = results_and_parameters.loc[ From 112b591a83d27927efc3ff8fd28a107672f7fcae Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 10 Oct 2025 14:16:39 +0200 Subject: [PATCH 08/16] rename --- {benchmark => terratorch_iterate}/__init__.py | 0 {benchmark => terratorch_iterate}/backbone_benchmark.py | 8 ++++---- {benchmark => terratorch_iterate}/benchmark_ray.py | 6 +++--- {benchmark => terratorch_iterate}/benchmark_types.py | 0 {benchmark => terratorch_iterate}/main.py | 8 ++++---- {benchmark => terratorch_iterate}/model_fitting.py | 2 +- {benchmark => terratorch_iterate}/module.py | 0 {benchmark => terratorch_iterate}/plot_tools.py | 0 {benchmark => terratorch_iterate}/py.typed | 0 .../repeat_best_experiment.py | 4 ++-- .../resources/dataset_specifications/agb.yaml | 0 .../resources/dataset_specifications/eurosat.yaml | 0 .../resources/dataset_specifications/fire_scars.yaml | 0 .../dataset_specifications/multi_temporal_crop.yaml | 0 .../resources/dataset_specifications/sen1floods11.yaml | 0 .../dataset_specifications/sen1floods11_transforms.yaml | 0 {benchmark => terratorch_iterate}/tests/__init__.py | 0 {benchmark => terratorch_iterate}/utils.py | 4 ++-- 18 files changed, 16 insertions(+), 16 deletions(-) rename {benchmark => terratorch_iterate}/__init__.py (100%) rename {benchmark => terratorch_iterate}/backbone_benchmark.py (98%) rename {benchmark => terratorch_iterate}/benchmark_ray.py (97%) rename {benchmark => terratorch_iterate}/benchmark_types.py (100%) rename {benchmark => terratorch_iterate}/main.py (97%) rename {benchmark => terratorch_iterate}/model_fitting.py (99%) rename {benchmark => terratorch_iterate}/module.py (100%) rename {benchmark => terratorch_iterate}/plot_tools.py (100%) rename {benchmark => terratorch_iterate}/py.typed (100%) rename {benchmark => terratorch_iterate}/repeat_best_experiment.py (99%) rename {benchmark => terratorch_iterate}/resources/dataset_specifications/agb.yaml (100%) rename {benchmark => terratorch_iterate}/resources/dataset_specifications/eurosat.yaml (100%) rename {benchmark => terratorch_iterate}/resources/dataset_specifications/fire_scars.yaml (100%) rename {benchmark => terratorch_iterate}/resources/dataset_specifications/multi_temporal_crop.yaml (100%) rename {benchmark => terratorch_iterate}/resources/dataset_specifications/sen1floods11.yaml (100%) rename {benchmark => terratorch_iterate}/resources/dataset_specifications/sen1floods11_transforms.yaml (100%) rename {benchmark => terratorch_iterate}/tests/__init__.py (100%) rename {benchmark => terratorch_iterate}/utils.py (99%) diff --git a/benchmark/__init__.py b/terratorch_iterate/__init__.py similarity index 100% rename from benchmark/__init__.py rename to terratorch_iterate/__init__.py diff --git a/benchmark/backbone_benchmark.py b/terratorch_iterate/backbone_benchmark.py similarity index 98% rename from benchmark/backbone_benchmark.py rename to terratorch_iterate/backbone_benchmark.py index bb0d6e6..aa0e8a0 100644 --- a/benchmark/backbone_benchmark.py +++ b/terratorch_iterate/backbone_benchmark.py @@ -17,16 +17,16 @@ from optuna.samplers import BaseSampler, RandomSampler from tabulate import tabulate import pickle -from benchmark.benchmark_types import ( +from terratorch_iterate.benchmark_types import ( Defaults, ParameterBounds, Task, combine_with_defaults, optimization_space_type, ) -from benchmark.model_fitting import fit_model, fit_model_with_hparams -from benchmark.repeat_best_experiment import rerun_best_from_backbone -from benchmark.utils import ( +from terratorch_iterate.model_fitting import fit_model, fit_model_with_hparams +from terratorch_iterate.repeat_best_experiment import rerun_best_from_backbone +from terratorch_iterate.utils import ( check_existing_task_parent_runs, check_existing_experiments, unflatten, diff --git a/benchmark/benchmark_ray.py b/terratorch_iterate/benchmark_ray.py similarity index 97% rename from benchmark/benchmark_ray.py rename to terratorch_iterate/benchmark_ray.py index 81eed60..087fa2b 100644 --- a/benchmark/benchmark_ray.py +++ b/terratorch_iterate/benchmark_ray.py @@ -14,15 +14,15 @@ from ray.tune.search.optuna import OptunaSearch from tabulate import tabulate -from benchmark.backbone_benchmark import parse_optimization_space -from benchmark.benchmark_types import ( +from terratorch_iterate.backbone_benchmark import parse_optimization_space +from terratorch_iterate.benchmark_types import ( Defaults, Task, TrainingSpec, combine_with_defaults, optimization_space_type, ) -from benchmark.model_fitting import fit_model, ray_tune_model, valid_task_types +from terratorch_iterate.model_fitting import fit_model, ray_tune_model, valid_task_types def benchmark_backbone_on_task( diff --git a/benchmark/benchmark_types.py b/terratorch_iterate/benchmark_types.py similarity index 100% rename from benchmark/benchmark_types.py rename to terratorch_iterate/benchmark_types.py diff --git a/benchmark/main.py b/terratorch_iterate/main.py similarity index 97% rename from benchmark/main.py rename to terratorch_iterate/main.py index 376de06..d6ad13f 100644 --- a/benchmark/main.py +++ b/terratorch_iterate/main.py @@ -3,10 +3,10 @@ from pathlib import Path from typing import Any, List from jsonargparse import ArgumentParser -from benchmark.backbone_benchmark import benchmark_backbone -from benchmark.benchmark_types import Defaults, Task -from benchmark.repeat_best_experiment import rerun_best_from_backbone -from benchmark.utils import (get_logger, import_custom_modules, +from terratorch_iterate.backbone_benchmark import benchmark_backbone +from terratorch_iterate.benchmark_types import Defaults, Task +from terratorch_iterate.repeat_best_experiment import rerun_best_from_backbone +from terratorch_iterate.utils import (get_logger, import_custom_modules, get_results_and_parameters, extract_parameters) def main(): diff --git a/benchmark/model_fitting.py b/terratorch_iterate/model_fitting.py similarity index 99% rename from benchmark/model_fitting.py rename to terratorch_iterate/model_fitting.py index 61375b9..5fea265 100644 --- a/benchmark/model_fitting.py +++ b/terratorch_iterate/model_fitting.py @@ -55,7 +55,7 @@ from torchgeo.datamodules import BaseDataModule from torchgeo.trainers import BaseTask -from benchmark.benchmark_types import ( +from terratorch_iterate.benchmark_types import ( ParameterBounds, ParameterTypeEnum, TrainingSpec, diff --git a/benchmark/module.py b/terratorch_iterate/module.py similarity index 100% rename from benchmark/module.py rename to terratorch_iterate/module.py diff --git a/benchmark/plot_tools.py b/terratorch_iterate/plot_tools.py similarity index 100% rename from benchmark/plot_tools.py rename to terratorch_iterate/plot_tools.py diff --git a/benchmark/py.typed b/terratorch_iterate/py.typed similarity index 100% rename from benchmark/py.typed rename to terratorch_iterate/py.typed diff --git a/benchmark/repeat_best_experiment.py b/terratorch_iterate/repeat_best_experiment.py similarity index 99% rename from benchmark/repeat_best_experiment.py rename to terratorch_iterate/repeat_best_experiment.py index 1f39e5c..5fa921e 100644 --- a/benchmark/repeat_best_experiment.py +++ b/terratorch_iterate/repeat_best_experiment.py @@ -25,13 +25,13 @@ from lightning.pytorch.loggers.mlflow import MLFlowLogger import time import pdb -from benchmark.benchmark_types import ( +from terratorch_iterate.benchmark_types import ( Defaults, Task, TrainingSpec, combine_with_defaults, ) -from benchmark.model_fitting import ( +from terratorch_iterate.model_fitting import ( get_default_callbacks, inject_hparams, valid_task_types, diff --git a/benchmark/resources/dataset_specifications/agb.yaml b/terratorch_iterate/resources/dataset_specifications/agb.yaml similarity index 100% rename from benchmark/resources/dataset_specifications/agb.yaml rename to terratorch_iterate/resources/dataset_specifications/agb.yaml diff --git a/benchmark/resources/dataset_specifications/eurosat.yaml b/terratorch_iterate/resources/dataset_specifications/eurosat.yaml similarity index 100% rename from benchmark/resources/dataset_specifications/eurosat.yaml rename to terratorch_iterate/resources/dataset_specifications/eurosat.yaml diff --git a/benchmark/resources/dataset_specifications/fire_scars.yaml b/terratorch_iterate/resources/dataset_specifications/fire_scars.yaml similarity index 100% rename from benchmark/resources/dataset_specifications/fire_scars.yaml rename to terratorch_iterate/resources/dataset_specifications/fire_scars.yaml diff --git a/benchmark/resources/dataset_specifications/multi_temporal_crop.yaml b/terratorch_iterate/resources/dataset_specifications/multi_temporal_crop.yaml similarity index 100% rename from benchmark/resources/dataset_specifications/multi_temporal_crop.yaml rename to terratorch_iterate/resources/dataset_specifications/multi_temporal_crop.yaml diff --git a/benchmark/resources/dataset_specifications/sen1floods11.yaml b/terratorch_iterate/resources/dataset_specifications/sen1floods11.yaml similarity index 100% rename from benchmark/resources/dataset_specifications/sen1floods11.yaml rename to terratorch_iterate/resources/dataset_specifications/sen1floods11.yaml diff --git a/benchmark/resources/dataset_specifications/sen1floods11_transforms.yaml b/terratorch_iterate/resources/dataset_specifications/sen1floods11_transforms.yaml similarity index 100% rename from benchmark/resources/dataset_specifications/sen1floods11_transforms.yaml rename to terratorch_iterate/resources/dataset_specifications/sen1floods11_transforms.yaml diff --git a/benchmark/tests/__init__.py b/terratorch_iterate/tests/__init__.py similarity index 100% rename from benchmark/tests/__init__.py rename to terratorch_iterate/tests/__init__.py diff --git a/benchmark/utils.py b/terratorch_iterate/utils.py similarity index 99% rename from benchmark/utils.py rename to terratorch_iterate/utils.py index 77bc9c4..6ca0580 100644 --- a/benchmark/utils.py +++ b/terratorch_iterate/utils.py @@ -10,8 +10,8 @@ from matplotlib import pyplot as plt from ast import literal_eval import optuna -from benchmark.benchmark_types import Task -from benchmark import plot_tools +from terratorch_iterate.benchmark_types import Task +from terratorch_iterate import plot_tools import sys from mlflow.entities.experiment import Experiment import importlib From a1e13abdf687b42ef9d48fcbda917c8cd7d09df4 Mon Sep 17 00:00:00 2001 From: Paolo Fraccaro Date: Fri, 10 Oct 2025 14:17:28 +0200 Subject: [PATCH 09/16] fix test --- tests/test_benchmark.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 62d2091..81c46b2 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -1,8 +1,8 @@ import itertools import logging -from benchmark.benchmark_types import Defaults, Task, TaskTypeEnum +from terratorch_iterate.benchmark_types import Defaults, Task, TaskTypeEnum import pytest -from benchmark.backbone_benchmark import benchmark_backbone +from terratorch_iterate.backbone_benchmark import benchmark_backbone from terratorch.datamodules import MChesapeakeLandcoverNonGeoDataModule from albumentations import HorizontalFlip, VerticalFlip, Resize from albumentations.pytorch.transforms import ToTensorV2 From 4ead581500ca9c1e715119de8556d5c298ced032 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 13 Oct 2025 10:38:14 -0300 Subject: [PATCH 10/16] change minimum python version; move module to unit test Signed-off-by: Leonardo P. Tizzei --- pyproject.toml | 2 +- tests/unit/test_model_fitting.py | 38 ++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_model_fitting.py diff --git a/pyproject.toml b/pyproject.toml index e36edb0..16d7f11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ include = ["benchmark*"] name = "terratorch-iterate" version = "0.2.0" -requires-python = ">= 3.10" +requires-python = ">= 3.11" description = "A terratorch's plugin for benchmarking and hyperparameter optimization" authors = [ { name = "Carlos Gomes"}, diff --git a/tests/unit/test_model_fitting.py b/tests/unit/test_model_fitting.py new file mode 100644 index 0000000..7a4dcb2 --- /dev/null +++ b/tests/unit/test_model_fitting.py @@ -0,0 +1,38 @@ +from pathlib import Path + +from jsonargparse import ArgumentParser, Namespace +from terratorch_iterate.iterate_types import Task +import uuid +import pytest + + +@pytest.mark.skip() +def test_launch_training(): + # experiment_name='dofa_large_patch16_224_upernetdecoder_true_modified_continue_False_test_models_True' metric='val/loss' storage_uri='/dccstor/geofm-finetuning/terratorch-iterate-test/39d14a9ed79e4ee39739fa92a4cdd758/hpo' direction='max' + random_hex = uuid.uuid4().hex + + storage_uri = Path(f"/tmp/{random_hex}") + if not storage_uri.exists(): + storage_uri.mkdir() + parser = ArgumentParser() + config_path = ( + Path(__file__).parent.parent.parent + / "configs/tests/dofa_large_patch16_224_upernetdecoder_true_modified.yaml" + ) + assert config_path.exists() + config = parser.parse_path(config_path) + config_init: Namespace = parser.instantiate_classes(config) + tasks = config_init.tasks + assert isinstance(tasks, list), f"Error! {tasks=} is not a list" + for t in tasks: + assert isinstance(t, Task), f"Error! {t=} is not a Task" + # data_module = MNzCattleNonGeoDataModule() + # trainer = Trainer(**training_spec_copy.trainer_args) + # launch_training( + # trainer=trainer, + # datamodule=datamodule, + # experiment_name=experiment_name, + # metric=metric, + # direction=direction, + # storage_uri=storage_uri, + # ) From ea78d5d595cbb9a5d05fd14d4976aa33831d8d77 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 20 Oct 2025 11:07:47 -0300 Subject: [PATCH 11/16] add mlflow; change version Signed-off-by: Leonardo P. Tizzei --- .pre-commit-config.yaml | 3 ++- .secrets.baseline | 4 ++-- pyproject.toml | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08e218f..0b8e668 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,9 +10,10 @@ repos: # You are encouraged to use static refs such as tags, instead of branch name # # Running "pre-commit autoupdate" automatically updates rev to latest tag - rev: 0.13.1+ibm.61.dss + rev: 0.13.1+ibm.62.dss hooks: - id: detect-secrets # pragma: whitelist secret + additional_dependencies: [boxsdk<4] # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options. # You may also run `pre-commit run detect-secrets` to preview the scan result. # when "--baseline" without "--use-all-plugins", pre-commit scan with just plugins in baseline file diff --git a/.secrets.baseline b/.secrets.baseline index e3ac1a7..959a570 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2025-10-01T20:02:29Z", + "generated_at": "2025-10-20T14:06:44Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -138,7 +138,7 @@ } ] }, - "version": "0.13.1+ibm.61.dss", + "version": "0.13.1+ibm.62.dss", "word_list": { "file": null, "hash": null diff --git a/pyproject.toml b/pyproject.toml index 16d7f11..1f1933b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,12 +8,12 @@ requires = ["setuptools >= 77.0.3"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] -include = ["benchmark*"] +include = ["terratorch_iterate*"] [project] name = "terratorch-iterate" -version = "0.2.0" +version = "0.2.2rc1" requires-python = ">= 3.11" description = "A terratorch's plugin for benchmarking and hyperparameter optimization" authors = [ @@ -61,6 +61,7 @@ dependencies = [ "more-itertools", "importlib-metadata", "numpy", +"mlflow", "optuna", "types-tabulate", "ray", From 54a3ff0932ce9ab277952c9aebefe052b30e6393 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Mon, 27 Oct 2025 17:14:14 -0300 Subject: [PATCH 12/16] run tests using iterate command instead of terratorch iterate Signed-off-by: Leonardo P. Tizzei --- run_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tests.py b/run_tests.py index 60fa0b1..bd814df 100644 --- a/run_tests.py +++ b/run_tests.py @@ -59,7 +59,7 @@ def submit_job( if tc_id is not None: jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" pytest -vv tests/integration/test_main.py::test_main[{tc_id}]' elif config is not None: - jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" terratorch iterate --hpo --config {config}' + jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" iterate --hpo --config {config}' else: raise ValueError("Error! Either tc_id or config must be not None") cmd = jbsub.split() From d7c56528f3a4b4b874cfd02734e43f8df2657196 Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Tue, 28 Oct 2025 09:51:04 -0300 Subject: [PATCH 13/16] replace flake8 by ruff on github workflows; fix linter errors Signed-off-by: Leonardo P. Tizzei --- .github/workflows/pylint.yml | 23 ----- .github/workflows/python-package.yml | 8 +- .pre-commit-config.yaml | 12 ++- .secrets.baseline | 18 ++-- plotting/plot_results_mlflow.ipynb | 5 +- plotting/plot_results_repeated_runs.ipynb | 9 +- pyproject.toml | 4 +- terratorch_iterate/benchmark_types.py | 1 - terratorch_iterate/iterate_types.py | 1 - terratorch_iterate/repeat_best_experiment.py | 15 +-- terratorch_iterate/utils.py | 98 ++++++++++---------- 11 files changed, 85 insertions(+), 109 deletions(-) delete mode 100644 .github/workflows/pylint.yml diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml deleted file mode 100644 index 3a2b5d1..0000000 --- a/.github/workflows/pylint.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Pylint - -on: [push] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v5 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - - name: Analysing the code with pylint - run: | - pylint $(git ls-files '*.py') diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 72e1ce6..1067638 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,13 +27,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest + python -m pip install ruff pytest python -m pip install -e . python -m pip install -e ".[dev]" python -m pip install -e ".[test]" - - name: Lint with flake8 + - name: Lint with ruff run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=F821,F401 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + ruff check \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0b8e668..3df104d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,9 +21,13 @@ repos: # add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets args: [--baseline, .secrets.baseline, --use-all-plugins] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.3 + # Ruff version. + rev: v0.14.2 hooks: + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --fix ] + # Run the formatter. - id: ruff-format - types_or: - - python - - jupyter + types_or: [ python, pyi ] diff --git a/.secrets.baseline b/.secrets.baseline index 959a570..d18435e 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2025-10-20T14:06:44Z", + "generated_at": "2025-10-28T12:40:55Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -81,21 +81,21 @@ { "hashed_secret": "5810b71c07271f259208c5790992170ac1e13b37", "is_verified": false, - "line_number": 437, + "line_number": 436, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "1c1dc227208cec78bbdb8d9247164879f908a9ad", "is_verified": false, - "line_number": 482, + "line_number": 481, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "e57967bc8f018a30bb192717673876f0ebdbe5d9", "is_verified": false, - "line_number": 558, + "line_number": 557, "type": "Base64 High Entropy String", "verified_result": null } @@ -104,35 +104,35 @@ { "hashed_secret": "e52b18568a4fa073b958134ea5ec0f9407b6ebc3", "is_verified": false, - "line_number": 352, + "line_number": 345, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "43cf2641021e5833120affd5a2bcdf35089eaf75", "is_verified": false, - "line_number": 417, + "line_number": 410, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "78f9a422a3afb6ff5aff30094699c2b299dfd614", "is_verified": false, - "line_number": 949, + "line_number": 942, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "2525429c7a93512ed0c4b799b867a83a6b19f7ff", "is_verified": false, - "line_number": 1014, + "line_number": 1007, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "8915fab07d3bf85d3755089a7fc82e911405d40a", "is_verified": false, - "line_number": 1080, + "line_number": 1073, "type": "Base64 High Entropy String", "verified_result": null } diff --git a/plotting/plot_results_mlflow.ipynb b/plotting/plot_results_mlflow.ipynb index 5d3a752..2f1865e 100644 --- a/plotting/plot_results_mlflow.ipynb +++ b/plotting/plot_results_mlflow.ipynb @@ -2,14 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "import json" + "import matplotlib.pyplot as plt" ] }, { diff --git a/plotting/plot_results_repeated_runs.ipynb b/plotting/plot_results_repeated_runs.ipynb index bcb8f85..a612fa6 100644 --- a/plotting/plot_results_repeated_runs.ipynb +++ b/plotting/plot_results_repeated_runs.ipynb @@ -19,25 +19,18 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"GEO_BENCH_DIR\"] = \"/Users/cpi/terratorch\"\n", - "import numpy as np\n", "from matplotlib import pyplot as plt\n", "import pandas as pd\n", - "from pathlib import Path\n", "import seaborn as sns\n", "\n", - "import geobench as gb\n", - "\n", "# from geobench_exp.experiment import parse_results\n", - "from matplotlib.ticker import FormatStrFormatter\n", - "import json\n", - "from scipy.stats import trim_mean\n", "import plot_tools" ] }, diff --git a/pyproject.toml b/pyproject.toml index 1f1933b..0d4e4d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,6 @@ dependencies = [ "opencv-python-headless", "configspace", "optuna-integration", -"seaborn", "psutil", "tabulate>=0.9.0", ] @@ -87,7 +86,8 @@ dev = [ "mkdocstrings[python]", "mike", # for building docs with versions "tox", - "pre-commit" + "pre-commit", + "seaborn" ] test = [ diff --git a/terratorch_iterate/benchmark_types.py b/terratorch_iterate/benchmark_types.py index aaf3ea6..1ff0201 100644 --- a/terratorch_iterate/benchmark_types.py +++ b/terratorch_iterate/benchmark_types.py @@ -15,7 +15,6 @@ ObjectDetectionTask, ) from torchgeo.datamodules import BaseDataModule -from geobench_v2.datamodules import GeoBenchDataModule valid_task_types = type[ SemanticSegmentationTask diff --git a/terratorch_iterate/iterate_types.py b/terratorch_iterate/iterate_types.py index aaf3ea6..1ff0201 100644 --- a/terratorch_iterate/iterate_types.py +++ b/terratorch_iterate/iterate_types.py @@ -15,7 +15,6 @@ ObjectDetectionTask, ) from torchgeo.datamodules import BaseDataModule -from geobench_v2.datamodules import GeoBenchDataModule valid_task_types = type[ SemanticSegmentationTask diff --git a/terratorch_iterate/repeat_best_experiment.py b/terratorch_iterate/repeat_best_experiment.py index 70e91dc..6391369 100644 --- a/terratorch_iterate/repeat_best_experiment.py +++ b/terratorch_iterate/repeat_best_experiment.py @@ -257,7 +257,7 @@ def rerun_best_from_backbone( raise Exception( f"output_path must be absolute. Consider using $(pwd)/{output_path}." ) - if (tmp_dir is None) & (use_ray == True): + if (tmp_dir is None) & use_ray: raise Exception("tmp_dir must be specified for runs with ray.") if use_ray: @@ -266,7 +266,7 @@ def rerun_best_from_backbone( if backbone_import: importlib.import_module(backbone_import) mlflow.set_tracking_uri(storage_uri) - + mlflow.set_experiment(experiment_name) runs: list[mlflow.entities.Run] = mlflow.search_runs( @@ -296,16 +296,19 @@ def rerun_best_from_backbone( repeated_experiment_name = f"{experiment_name}_repeated_exp" mlflow.set_tracking_uri(repeated_storage_uri) mlflow.set_experiment(repeated_experiment_name) - experiment_id = mlflow.get_experiment_by_name(repeated_experiment_name).experiment_id + experiment_id = mlflow.get_experiment_by_name( + repeated_experiment_name + ).experiment_id tmp_runs = get_nested_runs(experiment_id, experiment_name, repeated_storage_uri) if len(tmp_runs) > 0: - if len(tmp_runs) > 1: tmp_runs = [x for x in runs if x["run_name"] == experiment_name] + if len(tmp_runs) > 1: + tmp_runs = [x for x in runs if x["run_name"] == experiment_name] run_id = tmp_runs[0]["run_id"] else: run_id = None - - #backbone_name = defaults.terratorch_task["model_args"]["backbone"] + + # backbone_name = defaults.terratorch_task["model_args"]["backbone"] with mlflow.start_run(run_name=experiment_name, run_id=run_id) as run: for task in tasks: logger.info(f"\n\ntask: {task.name}") diff --git a/terratorch_iterate/utils.py b/terratorch_iterate/utils.py index bc6cf96..5fcce35 100644 --- a/terratorch_iterate/utils.py +++ b/terratorch_iterate/utils.py @@ -15,12 +15,9 @@ import sys from mlflow.entities.experiment import Experiment import importlib -import logging from mlflow.tracking import MlflowClient from mlflow.entities import ViewType from collections import defaultdict -import pdb - N_TRIALS_DEFAULT = 16 REPEATED_SEEDS_DEFAULT = 10 @@ -220,8 +217,13 @@ def extract_repeated_experiment_results( seed = int(run.info.run_name.split("_")[-1]) if task in task_info: metric_name = task_info[task] - metric_name = 'test_test/' + metric_name.split("/")[-1] if '/' in metric_name else 'test_test_' + metric_name.replace(metric_name.split('_')[0] + "_", '') - else: + metric_name = ( + "test_test/" + metric_name.split("/")[-1] + if "/" in metric_name + else "test_test_" + + metric_name.replace(metric_name.split("_")[0] + "_", "") + ) + else: continue if metric_name not in run.data.metrics: @@ -283,7 +285,7 @@ def extract_repeated_experiment_results( f"EXPERIMENT INCOMPLETE: {experiment_name} has {len(combine_task_results)} complete tasks only" ) incomplete_experiments.append(experiment_name) - if len(combine_exp_results) > 0: + if len(combine_exp_results) > 0: combine_exp_results = pd.concat(combine_exp_results, axis=0) else: combine_exp_results = pd.DataFrame() @@ -444,14 +446,13 @@ def get_results_and_parameters( results_and_parameters.to_csv( f"{str(results_dir)}/results_and_parameters.csv", index=False ) - - if visualise: - model_order = visualize_combined_results( + if visualise: + visualize_combined_results( combined_results=results_and_parameters, storage_uri=storage_uri, logger=logger, - plot_file_base_name=f"summary_plot", + plot_file_base_name="summary_plot", ) return results_and_parameters @@ -647,14 +648,16 @@ def check_existing_experiments( # check if one of the runs is complete for run in experiment_parent_run_data: - completed_task_run_names, all_tasks_in_experiment_finished, _ = ( - check_existing_task_parent_runs( - logger=logger, - exp_parent_run_id=run.info.run_id, - storage_uri=storage_uri, - experiment_name=experiment_name, - n_trials=n_trials, - ) + ( + completed_task_run_names, + all_tasks_in_experiment_finished, + _, + ) = check_existing_task_parent_runs( + logger=logger, + exp_parent_run_id=run.info.run_id, + storage_uri=storage_uri, + experiment_name=experiment_name, + n_trials=n_trials, ) logger.info(f"tasks that should be completed: {task_names}") logger.info(f"completed_task_run_names: {completed_task_run_names}") @@ -764,29 +767,25 @@ def visualize_combined_results( benchmark_name=plots_folder, ) - tmp = ( - plot_tools.normalize_bootstrap_and_plot( - combined_results, - # plot_file_base_name=plot_file_base_name, - metric="test metric", - benchmark_name=plots_folder, - model_order=model_order, - model_colors=model_colors, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) + plot_tools.normalize_bootstrap_and_plot( + combined_results, + # plot_file_base_name=plot_file_base_name, + metric="test metric", + benchmark_name=plots_folder, + model_order=model_order, + model_colors=model_colors, + fig_size=fig_size, + n_legend_rows=n_legend_rows, ) plt.savefig( - str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png" - ), + str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png"), bbox_inches="tight", ) plt.close() combined_results.to_csv( - str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv" - ) + str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv") ) @@ -827,7 +826,7 @@ def import_custom_modules( sys.path.insert(0, str(workdir)) try: - module = importlib.import_module(module_dir) + importlib.import_module(module_dir) logger.info(f"Found {custom_modules_path}") except ImportError: raise ImportError( @@ -860,7 +859,7 @@ def import_custom_modules( settings_per_model = [ "detection", ] - + # create box plots across multiple models for setting in settings_per_model: combined_results = results_and_parameters.loc[ @@ -873,18 +872,16 @@ def import_custom_modules( plot_file_base_name=f"multiple_models_{setting}", ) - ### code written with the help of Perplexity platform -def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): +def get_nested_runs(experiment_id, filter_string=None, mlflow_uri="mlflow"): client = MlflowClient(mlflow_uri) - + # Get all runs for the experiment all_runs = client.search_runs( - experiment_ids=[experiment_id], - run_view_type=ViewType.ACTIVE_ONLY + experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY ) - + # Create a dictionary to store the run hierarchy run_hierarchy = defaultdict(list) parent_runs = [] @@ -892,7 +889,7 @@ def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): # First pass: Identify parent-child relationships for run in all_runs: parent_run_id = run.data.tags.get("mlflow.parentRunId") - + if parent_run_id: run_hierarchy[parent_run_id].append(run) else: @@ -900,7 +897,6 @@ def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): # Function to create a nested dictionary for a run and its children def create_nested_dict(run): - run_dict = { "run": run, "run_id": run.info.run_id, @@ -908,13 +904,21 @@ def create_nested_dict(run): "status": run.info.status, "start_time": run.info.start_time, "end_time": run.info.end_time, - "children": [create_nested_dict(child) for child in run_hierarchy[run.info.run_id]] + "children": [ + create_nested_dict(child) for child in run_hierarchy[run.info.run_id] + ], } return run_dict - # Create the final nested structure + + # Create the final nested structure if filter_string: - nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string) > -1] + nested_runs = [ + create_nested_dict(parent_run) + for parent_run in parent_runs + if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string) + > -1 + ] else: nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs] - + return nested_runs From 60102cb73a8511a1cc72f291ce0d6a51614f05bc Mon Sep 17 00:00:00 2001 From: "Leonardo P. Tizzei" Date: Tue, 28 Oct 2025 09:51:04 -0300 Subject: [PATCH 14/16] replace flake8 by ruff on github workflows; fix linter errors Signed-off-by: Leonardo P. Tizzei Signed-off-by: Leonardo P Tizzei Third-Party DCO Remediation Commit for Paolo Fraccaro On behalf of Paolo Fraccaro , I, Leonardo P. Tizzei , hereby add my Signed-off-by to this commit: 112b591a83d27927efc3ff8fd28a107672f7fcae On behalf of Paolo Fraccaro , I, Leonardo P. Tizzei , hereby add my Signed-off-by to this commit: a1e13abdf687b42ef9d48fcbda917c8cd7d09df4 Signed-off-by: Leonardo P. Tizzei --- .github/workflows/pylint.yml | 23 ----- .github/workflows/python-package.yml | 8 +- .pre-commit-config.yaml | 12 ++- .secrets.baseline | 18 ++-- plotting/plot_results_mlflow.ipynb | 5 +- plotting/plot_results_repeated_runs.ipynb | 9 +- pyproject.toml | 4 +- terratorch_iterate/benchmark_types.py | 1 - terratorch_iterate/iterate_types.py | 1 - terratorch_iterate/repeat_best_experiment.py | 15 +-- terratorch_iterate/utils.py | 98 ++++++++++---------- 11 files changed, 85 insertions(+), 109 deletions(-) delete mode 100644 .github/workflows/pylint.yml diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml deleted file mode 100644 index 3a2b5d1..0000000 --- a/.github/workflows/pylint.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Pylint - -on: [push] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v5 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - - name: Analysing the code with pylint - run: | - pylint $(git ls-files '*.py') diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 72e1ce6..1067638 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,13 +27,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest + python -m pip install ruff pytest python -m pip install -e . python -m pip install -e ".[dev]" python -m pip install -e ".[test]" - - name: Lint with flake8 + - name: Lint with ruff run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=F821,F401 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + ruff check \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0b8e668..3df104d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,9 +21,13 @@ repos: # add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets args: [--baseline, .secrets.baseline, --use-all-plugins] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.3 + # Ruff version. + rev: v0.14.2 hooks: + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --fix ] + # Run the formatter. - id: ruff-format - types_or: - - python - - jupyter + types_or: [ python, pyi ] diff --git a/.secrets.baseline b/.secrets.baseline index 959a570..d18435e 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2025-10-20T14:06:44Z", + "generated_at": "2025-10-28T12:40:55Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -81,21 +81,21 @@ { "hashed_secret": "5810b71c07271f259208c5790992170ac1e13b37", "is_verified": false, - "line_number": 437, + "line_number": 436, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "1c1dc227208cec78bbdb8d9247164879f908a9ad", "is_verified": false, - "line_number": 482, + "line_number": 481, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "e57967bc8f018a30bb192717673876f0ebdbe5d9", "is_verified": false, - "line_number": 558, + "line_number": 557, "type": "Base64 High Entropy String", "verified_result": null } @@ -104,35 +104,35 @@ { "hashed_secret": "e52b18568a4fa073b958134ea5ec0f9407b6ebc3", "is_verified": false, - "line_number": 352, + "line_number": 345, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "43cf2641021e5833120affd5a2bcdf35089eaf75", "is_verified": false, - "line_number": 417, + "line_number": 410, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "78f9a422a3afb6ff5aff30094699c2b299dfd614", "is_verified": false, - "line_number": 949, + "line_number": 942, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "2525429c7a93512ed0c4b799b867a83a6b19f7ff", "is_verified": false, - "line_number": 1014, + "line_number": 1007, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "8915fab07d3bf85d3755089a7fc82e911405d40a", "is_verified": false, - "line_number": 1080, + "line_number": 1073, "type": "Base64 High Entropy String", "verified_result": null } diff --git a/plotting/plot_results_mlflow.ipynb b/plotting/plot_results_mlflow.ipynb index 5d3a752..2f1865e 100644 --- a/plotting/plot_results_mlflow.ipynb +++ b/plotting/plot_results_mlflow.ipynb @@ -2,14 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "import json" + "import matplotlib.pyplot as plt" ] }, { diff --git a/plotting/plot_results_repeated_runs.ipynb b/plotting/plot_results_repeated_runs.ipynb index bcb8f85..a612fa6 100644 --- a/plotting/plot_results_repeated_runs.ipynb +++ b/plotting/plot_results_repeated_runs.ipynb @@ -19,25 +19,18 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"GEO_BENCH_DIR\"] = \"/Users/cpi/terratorch\"\n", - "import numpy as np\n", "from matplotlib import pyplot as plt\n", "import pandas as pd\n", - "from pathlib import Path\n", "import seaborn as sns\n", "\n", - "import geobench as gb\n", - "\n", "# from geobench_exp.experiment import parse_results\n", - "from matplotlib.ticker import FormatStrFormatter\n", - "import json\n", - "from scipy.stats import trim_mean\n", "import plot_tools" ] }, diff --git a/pyproject.toml b/pyproject.toml index 1f1933b..0d4e4d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,6 @@ dependencies = [ "opencv-python-headless", "configspace", "optuna-integration", -"seaborn", "psutil", "tabulate>=0.9.0", ] @@ -87,7 +86,8 @@ dev = [ "mkdocstrings[python]", "mike", # for building docs with versions "tox", - "pre-commit" + "pre-commit", + "seaborn" ] test = [ diff --git a/terratorch_iterate/benchmark_types.py b/terratorch_iterate/benchmark_types.py index aaf3ea6..1ff0201 100644 --- a/terratorch_iterate/benchmark_types.py +++ b/terratorch_iterate/benchmark_types.py @@ -15,7 +15,6 @@ ObjectDetectionTask, ) from torchgeo.datamodules import BaseDataModule -from geobench_v2.datamodules import GeoBenchDataModule valid_task_types = type[ SemanticSegmentationTask diff --git a/terratorch_iterate/iterate_types.py b/terratorch_iterate/iterate_types.py index aaf3ea6..1ff0201 100644 --- a/terratorch_iterate/iterate_types.py +++ b/terratorch_iterate/iterate_types.py @@ -15,7 +15,6 @@ ObjectDetectionTask, ) from torchgeo.datamodules import BaseDataModule -from geobench_v2.datamodules import GeoBenchDataModule valid_task_types = type[ SemanticSegmentationTask diff --git a/terratorch_iterate/repeat_best_experiment.py b/terratorch_iterate/repeat_best_experiment.py index 70e91dc..6391369 100644 --- a/terratorch_iterate/repeat_best_experiment.py +++ b/terratorch_iterate/repeat_best_experiment.py @@ -257,7 +257,7 @@ def rerun_best_from_backbone( raise Exception( f"output_path must be absolute. Consider using $(pwd)/{output_path}." ) - if (tmp_dir is None) & (use_ray == True): + if (tmp_dir is None) & use_ray: raise Exception("tmp_dir must be specified for runs with ray.") if use_ray: @@ -266,7 +266,7 @@ def rerun_best_from_backbone( if backbone_import: importlib.import_module(backbone_import) mlflow.set_tracking_uri(storage_uri) - + mlflow.set_experiment(experiment_name) runs: list[mlflow.entities.Run] = mlflow.search_runs( @@ -296,16 +296,19 @@ def rerun_best_from_backbone( repeated_experiment_name = f"{experiment_name}_repeated_exp" mlflow.set_tracking_uri(repeated_storage_uri) mlflow.set_experiment(repeated_experiment_name) - experiment_id = mlflow.get_experiment_by_name(repeated_experiment_name).experiment_id + experiment_id = mlflow.get_experiment_by_name( + repeated_experiment_name + ).experiment_id tmp_runs = get_nested_runs(experiment_id, experiment_name, repeated_storage_uri) if len(tmp_runs) > 0: - if len(tmp_runs) > 1: tmp_runs = [x for x in runs if x["run_name"] == experiment_name] + if len(tmp_runs) > 1: + tmp_runs = [x for x in runs if x["run_name"] == experiment_name] run_id = tmp_runs[0]["run_id"] else: run_id = None - - #backbone_name = defaults.terratorch_task["model_args"]["backbone"] + + # backbone_name = defaults.terratorch_task["model_args"]["backbone"] with mlflow.start_run(run_name=experiment_name, run_id=run_id) as run: for task in tasks: logger.info(f"\n\ntask: {task.name}") diff --git a/terratorch_iterate/utils.py b/terratorch_iterate/utils.py index bc6cf96..5fcce35 100644 --- a/terratorch_iterate/utils.py +++ b/terratorch_iterate/utils.py @@ -15,12 +15,9 @@ import sys from mlflow.entities.experiment import Experiment import importlib -import logging from mlflow.tracking import MlflowClient from mlflow.entities import ViewType from collections import defaultdict -import pdb - N_TRIALS_DEFAULT = 16 REPEATED_SEEDS_DEFAULT = 10 @@ -220,8 +217,13 @@ def extract_repeated_experiment_results( seed = int(run.info.run_name.split("_")[-1]) if task in task_info: metric_name = task_info[task] - metric_name = 'test_test/' + metric_name.split("/")[-1] if '/' in metric_name else 'test_test_' + metric_name.replace(metric_name.split('_')[0] + "_", '') - else: + metric_name = ( + "test_test/" + metric_name.split("/")[-1] + if "/" in metric_name + else "test_test_" + + metric_name.replace(metric_name.split("_")[0] + "_", "") + ) + else: continue if metric_name not in run.data.metrics: @@ -283,7 +285,7 @@ def extract_repeated_experiment_results( f"EXPERIMENT INCOMPLETE: {experiment_name} has {len(combine_task_results)} complete tasks only" ) incomplete_experiments.append(experiment_name) - if len(combine_exp_results) > 0: + if len(combine_exp_results) > 0: combine_exp_results = pd.concat(combine_exp_results, axis=0) else: combine_exp_results = pd.DataFrame() @@ -444,14 +446,13 @@ def get_results_and_parameters( results_and_parameters.to_csv( f"{str(results_dir)}/results_and_parameters.csv", index=False ) - - if visualise: - model_order = visualize_combined_results( + if visualise: + visualize_combined_results( combined_results=results_and_parameters, storage_uri=storage_uri, logger=logger, - plot_file_base_name=f"summary_plot", + plot_file_base_name="summary_plot", ) return results_and_parameters @@ -647,14 +648,16 @@ def check_existing_experiments( # check if one of the runs is complete for run in experiment_parent_run_data: - completed_task_run_names, all_tasks_in_experiment_finished, _ = ( - check_existing_task_parent_runs( - logger=logger, - exp_parent_run_id=run.info.run_id, - storage_uri=storage_uri, - experiment_name=experiment_name, - n_trials=n_trials, - ) + ( + completed_task_run_names, + all_tasks_in_experiment_finished, + _, + ) = check_existing_task_parent_runs( + logger=logger, + exp_parent_run_id=run.info.run_id, + storage_uri=storage_uri, + experiment_name=experiment_name, + n_trials=n_trials, ) logger.info(f"tasks that should be completed: {task_names}") logger.info(f"completed_task_run_names: {completed_task_run_names}") @@ -764,29 +767,25 @@ def visualize_combined_results( benchmark_name=plots_folder, ) - tmp = ( - plot_tools.normalize_bootstrap_and_plot( - combined_results, - # plot_file_base_name=plot_file_base_name, - metric="test metric", - benchmark_name=plots_folder, - model_order=model_order, - model_colors=model_colors, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) + plot_tools.normalize_bootstrap_and_plot( + combined_results, + # plot_file_base_name=plot_file_base_name, + metric="test metric", + benchmark_name=plots_folder, + model_order=model_order, + model_colors=model_colors, + fig_size=fig_size, + n_legend_rows=n_legend_rows, ) plt.savefig( - str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png" - ), + str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png"), bbox_inches="tight", ) plt.close() combined_results.to_csv( - str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv" - ) + str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv") ) @@ -827,7 +826,7 @@ def import_custom_modules( sys.path.insert(0, str(workdir)) try: - module = importlib.import_module(module_dir) + importlib.import_module(module_dir) logger.info(f"Found {custom_modules_path}") except ImportError: raise ImportError( @@ -860,7 +859,7 @@ def import_custom_modules( settings_per_model = [ "detection", ] - + # create box plots across multiple models for setting in settings_per_model: combined_results = results_and_parameters.loc[ @@ -873,18 +872,16 @@ def import_custom_modules( plot_file_base_name=f"multiple_models_{setting}", ) - ### code written with the help of Perplexity platform -def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): +def get_nested_runs(experiment_id, filter_string=None, mlflow_uri="mlflow"): client = MlflowClient(mlflow_uri) - + # Get all runs for the experiment all_runs = client.search_runs( - experiment_ids=[experiment_id], - run_view_type=ViewType.ACTIVE_ONLY + experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY ) - + # Create a dictionary to store the run hierarchy run_hierarchy = defaultdict(list) parent_runs = [] @@ -892,7 +889,7 @@ def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): # First pass: Identify parent-child relationships for run in all_runs: parent_run_id = run.data.tags.get("mlflow.parentRunId") - + if parent_run_id: run_hierarchy[parent_run_id].append(run) else: @@ -900,7 +897,6 @@ def get_nested_runs(experiment_id, filter_string = None, mlflow_uri= "mlflow"): # Function to create a nested dictionary for a run and its children def create_nested_dict(run): - run_dict = { "run": run, "run_id": run.info.run_id, @@ -908,13 +904,21 @@ def create_nested_dict(run): "status": run.info.status, "start_time": run.info.start_time, "end_time": run.info.end_time, - "children": [create_nested_dict(child) for child in run_hierarchy[run.info.run_id]] + "children": [ + create_nested_dict(child) for child in run_hierarchy[run.info.run_id] + ], } return run_dict - # Create the final nested structure + + # Create the final nested structure if filter_string: - nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string) > -1] + nested_runs = [ + create_nested_dict(parent_run) + for parent_run in parent_runs + if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string) + > -1 + ] else: nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs] - + return nested_runs From 14391a9d3813ff4d67558528e5d8f6928dc60ec6 Mon Sep 17 00:00:00 2001 From: Leonardo P Tizzei Date: Tue, 28 Oct 2025 11:17:32 -0300 Subject: [PATCH 15/16] Third-Party DCO Remediation Commit for Paolo Fraccaro On behalf of Paolo Fraccaro , I, Leonardo P. Tizzei , hereby add my Signed-off-by to this commit: 112b591a83d27927efc3ff8fd28a107672f7fcae On behalf of Paolo Fraccaro , I, Leonardo P. Tizzei , hereby add my Signed-off-by to this commit: a1e13abdf687b42ef9d48fcbda917c8cd7d09df4 Signed-off-by: Leonardo P. Tizzei Signed-off-by: Leonardo P Tizzei --- README.md | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6af3542..58bf59d 100644 --- a/README.md +++ b/README.md @@ -75,28 +75,33 @@ If users want to optimize hyperparameters: terratorch iterate --hpo --config ``` +Another way to run terratorch-iterate is to omit `terratorch` by running: +```shell +iterate --hpo --config +``` + For instance: ```shell -terratorch iterate --hpo --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml +iterate --hpo --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml ``` If users want to rerun best experiment, please use the same config file. Additionally, the `parent_run_id`, which is the mlflow run id from optimization, should be added as shown below: ```shell -terratorch iterate --repeat --config --parent_run_id +iterate --repeat --config --parent_run_id ``` For instance: ```shell -terratorch iterate --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml --parent_run_id 61bdee4a35a94f988ad30c46c87d4fbd +iterate --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml --parent_run_id 61bdee4a35a94f988ad30c46c87d4fbd ``` If users want to optimize hyperparameters then the rerun best experiment in a single command, please use both settings as shown below: ```shell -terratorch iterate --hpo --repeat --config +iterate --hpo --repeat --config ``` For instance: ```shell -terratorch iterate --hpo --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml +iterate --hpo --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml ``` To check the experiment results, use `mlflow ui --host $(hostname -f) --port --backend-store-uri ` @@ -121,11 +126,11 @@ See `configs/summarize_results_template.yaml` in the git repo for an example. To summarize results and hyperparameters, please run the following: ```shell -terratorch iterate --summarize --config +iterate --summarize --config ``` For instance: ```shell -terratorch iterate --summarize --config configs/summarize_results.yaml +iterate --summarize --config configs/summarize_results.yaml ``` The results and hyperparameters are extracted into a csv file. For example, if `storage_uri` is `/opt/benchmark_experiments/hpo`, then sumarized results will be saved in last file as shown below: From 5afd4ccdfb4a973d75cfa57f66718d12eb54e52e Mon Sep 17 00:00:00 2001 From: Leonardo P Tizzei Date: Tue, 28 Oct 2025 12:54:05 -0300 Subject: [PATCH 16/16] sign commit messaged Signed-off-by: Leonardo P Tizzei --- .pre-commit-config.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3df104d..e7e44d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,3 +31,7 @@ repos: # Run the formatter. - id: ruff-format types_or: [ python, pyi ] + - repo: https://github.com/mattlqx/pre-commit-sign + rev: v1.2.0 + hooks: + - id: sign-commit