diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml deleted file mode 100644 index 3a2b5d1..0000000 --- a/.github/workflows/pylint.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Pylint - -on: [push] - -jobs: - build: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ["3.10", "3.11", "3.12"] - steps: - - uses: actions/checkout@v5 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - - name: Analysing the code with pylint - run: | - pylint $(git ls-files '*.py') diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 72e1ce6..1067638 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,13 +27,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest + python -m pip install ruff pytest python -m pip install -e . python -m pip install -e ".[dev]" python -m pip install -e ".[test]" - - name: Lint with flake8 + - name: Lint with ruff run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=F821,F401 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + ruff check \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 08e218f..e7e44d1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,9 +10,10 @@ repos: # You are encouraged to use static refs such as tags, instead of branch name # # Running "pre-commit autoupdate" automatically updates rev to latest tag - rev: 0.13.1+ibm.61.dss + rev: 0.13.1+ibm.62.dss hooks: - id: detect-secrets # pragma: whitelist secret + additional_dependencies: [boxsdk<4] # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options. # You may also run `pre-commit run detect-secrets` to preview the scan result. # when "--baseline" without "--use-all-plugins", pre-commit scan with just plugins in baseline file @@ -20,9 +21,17 @@ repos: # add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets args: [--baseline, .secrets.baseline, --use-all-plugins] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.3 + # Ruff version. + rev: v0.14.2 hooks: + # Run the linter. + - id: ruff-check + types_or: [ python, pyi ] + args: [ --fix ] + # Run the formatter. - id: ruff-format - types_or: - - python - - jupyter + types_or: [ python, pyi ] + - repo: https://github.com/mattlqx/pre-commit-sign + rev: v1.2.0 + hooks: + - id: sign-commit diff --git a/.secrets.baseline b/.secrets.baseline index e3ac1a7..d18435e 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2025-10-01T20:02:29Z", + "generated_at": "2025-10-28T12:40:55Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -81,21 +81,21 @@ { "hashed_secret": "5810b71c07271f259208c5790992170ac1e13b37", "is_verified": false, - "line_number": 437, + "line_number": 436, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "1c1dc227208cec78bbdb8d9247164879f908a9ad", "is_verified": false, - "line_number": 482, + "line_number": 481, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "e57967bc8f018a30bb192717673876f0ebdbe5d9", "is_verified": false, - "line_number": 558, + "line_number": 557, "type": "Base64 High Entropy String", "verified_result": null } @@ -104,41 +104,41 @@ { "hashed_secret": "e52b18568a4fa073b958134ea5ec0f9407b6ebc3", "is_verified": false, - "line_number": 352, + "line_number": 345, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "43cf2641021e5833120affd5a2bcdf35089eaf75", "is_verified": false, - "line_number": 417, + "line_number": 410, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "78f9a422a3afb6ff5aff30094699c2b299dfd614", "is_verified": false, - "line_number": 949, + "line_number": 942, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "2525429c7a93512ed0c4b799b867a83a6b19f7ff", "is_verified": false, - "line_number": 1014, + "line_number": 1007, "type": "Base64 High Entropy String", "verified_result": null }, { "hashed_secret": "8915fab07d3bf85d3755089a7fc82e911405d40a", "is_verified": false, - "line_number": 1080, + "line_number": 1073, "type": "Base64 High Entropy String", "verified_result": null } ] }, - "version": "0.13.1+ibm.61.dss", + "version": "0.13.1+ibm.62.dss", "word_list": { "file": null, "hash": null diff --git a/README.md b/README.md index 6af3542..58bf59d 100644 --- a/README.md +++ b/README.md @@ -75,28 +75,33 @@ If users want to optimize hyperparameters: terratorch iterate --hpo --config ``` +Another way to run terratorch-iterate is to omit `terratorch` by running: +```shell +iterate --hpo --config +``` + For instance: ```shell -terratorch iterate --hpo --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml +iterate --hpo --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml ``` If users want to rerun best experiment, please use the same config file. Additionally, the `parent_run_id`, which is the mlflow run id from optimization, should be added as shown below: ```shell -terratorch iterate --repeat --config --parent_run_id +iterate --repeat --config --parent_run_id ``` For instance: ```shell -terratorch iterate --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml --parent_run_id 61bdee4a35a94f988ad30c46c87d4fbd +iterate --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml --parent_run_id 61bdee4a35a94f988ad30c46c87d4fbd ``` If users want to optimize hyperparameters then the rerun best experiment in a single command, please use both settings as shown below: ```shell -terratorch iterate --hpo --repeat --config +iterate --hpo --repeat --config ``` For instance: ```shell -terratorch iterate --hpo --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml +iterate --hpo --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml ``` To check the experiment results, use `mlflow ui --host $(hostname -f) --port --backend-store-uri ` @@ -121,11 +126,11 @@ See `configs/summarize_results_template.yaml` in the git repo for an example. To summarize results and hyperparameters, please run the following: ```shell -terratorch iterate --summarize --config +iterate --summarize --config ``` For instance: ```shell -terratorch iterate --summarize --config configs/summarize_results.yaml +iterate --summarize --config configs/summarize_results.yaml ``` The results and hyperparameters are extracted into a csv file. For example, if `storage_uri` is `/opt/benchmark_experiments/hpo`, then sumarized results will be saved in last file as shown below: diff --git a/plotting/plot_results_mlflow.ipynb b/plotting/plot_results_mlflow.ipynb index 5d3a752..2f1865e 100644 --- a/plotting/plot_results_mlflow.ipynb +++ b/plotting/plot_results_mlflow.ipynb @@ -2,14 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "import json" + "import matplotlib.pyplot as plt" ] }, { diff --git a/plotting/plot_results_repeated_runs.ipynb b/plotting/plot_results_repeated_runs.ipynb index bcb8f85..a612fa6 100644 --- a/plotting/plot_results_repeated_runs.ipynb +++ b/plotting/plot_results_repeated_runs.ipynb @@ -19,25 +19,18 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"GEO_BENCH_DIR\"] = \"/Users/cpi/terratorch\"\n", - "import numpy as np\n", "from matplotlib import pyplot as plt\n", "import pandas as pd\n", - "from pathlib import Path\n", "import seaborn as sns\n", "\n", - "import geobench as gb\n", - "\n", "# from geobench_exp.experiment import parse_results\n", - "from matplotlib.ticker import FormatStrFormatter\n", - "import json\n", - "from scipy.stats import trim_mean\n", "import plot_tools" ] }, diff --git a/pyproject.toml b/pyproject.toml index e36edb0..0d4e4d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,13 +8,13 @@ requires = ["setuptools >= 77.0.3"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] -include = ["benchmark*"] +include = ["terratorch_iterate*"] [project] name = "terratorch-iterate" -version = "0.2.0" -requires-python = ">= 3.10" +version = "0.2.2rc1" +requires-python = ">= 3.11" description = "A terratorch's plugin for benchmarking and hyperparameter optimization" authors = [ { name = "Carlos Gomes"}, @@ -61,6 +61,7 @@ dependencies = [ "more-itertools", "importlib-metadata", "numpy", +"mlflow", "optuna", "types-tabulate", "ray", @@ -69,7 +70,6 @@ dependencies = [ "opencv-python-headless", "configspace", "optuna-integration", -"seaborn", "psutil", "tabulate>=0.9.0", ] @@ -86,7 +86,8 @@ dev = [ "mkdocstrings[python]", "mike", # for building docs with versions "tox", - "pre-commit" + "pre-commit", + "seaborn" ] test = [ diff --git a/run_tests.py b/run_tests.py index 60fa0b1..bd814df 100644 --- a/run_tests.py +++ b/run_tests.py @@ -59,7 +59,7 @@ def submit_job( if tc_id is not None: jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" pytest -vv tests/integration/test_main.py::test_main[{tc_id}]' elif config is not None: - jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" terratorch iterate --hpo --config {config}' + jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" iterate --hpo --config {config}' else: raise ValueError("Error! Either tc_id or config must be not None") cmd = jbsub.split() diff --git a/terratorch_iterate/backbone_benchmark.py b/terratorch_iterate/backbone_benchmark.py index 412d245..02969d6 100644 --- a/terratorch_iterate/backbone_benchmark.py +++ b/terratorch_iterate/backbone_benchmark.py @@ -349,6 +349,7 @@ def benchmark_backbone( mlflow.set_tracking_uri(storage_uri) logger.info(f"Setting experiment name: {experiment_name}") mlflow.set_experiment(experiment_name) + experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id optimization_space = parse_optimization_space(optimization_space) diff --git a/terratorch_iterate/benchmark_types.py b/terratorch_iterate/benchmark_types.py new file mode 100644 index 0000000..1ff0201 --- /dev/null +++ b/terratorch_iterate/benchmark_types.py @@ -0,0 +1,181 @@ +""" +This module defines all the types expected at input. Used for type checking by jsonargparse. +""" + +from ast import Dict +import copy +import enum +from dataclasses import dataclass, field, replace +from typing import Any, Optional, Union +from terratorch.tasks import ( + ClassificationTask, + MultiLabelClassificationTask, + PixelwiseRegressionTask, + SemanticSegmentationTask, + ObjectDetectionTask, +) +from torchgeo.datamodules import BaseDataModule + +valid_task_types = type[ + SemanticSegmentationTask + | ClassificationTask + | PixelwiseRegressionTask + | ObjectDetectionTask +] + + +@dataclass +class TaskTypeEnum(enum.Enum): + """ + Enum for the type of task to be performed. segmentation, regression or classification. + """ + + segmentation = "segmentation" + regression = "regression" + classification = "classification" + multilabel_classification = "multilabel_classification" + object_detection = "object_detection" + + def get_class_from_enum( + self, + ) -> valid_task_types: + match self.value: + case TaskTypeEnum.segmentation.value: + return SemanticSegmentationTask + case TaskTypeEnum.regression.value: + return PixelwiseRegressionTask + case TaskTypeEnum.classification.value: + return ClassificationTask + case TaskTypeEnum.multilabel_classification.value: + return MultiLabelClassificationTask + case TaskTypeEnum.object_detection.value: + return ObjectDetectionTask + case _: + raise TypeError("Task type does not exist") + + +class ParameterTypeEnum(enum.Enum): + """ + Enum for the type of parameter allowed in ParameterBounds. integer or real. + """ + + integer = "int" + real = "real" + + +@dataclass +class ParameterBounds: + """ + Dataclass defining a numerical range to search over. + + Args: + min (float | int): Minimum. + max (float | int): Maximum. + type (ParameterTypeEnum): Whether the range is in the space of integers or real numbers. + log (bool): Whether to search over the log space (useful for parameters that vary wildly in scale, e.g. learning rate) + """ + + min: float | int + max: float | int + type: ParameterTypeEnum + log: bool = False + + def __post_init__(self): + if not isinstance(self.type, ParameterTypeEnum): + self.type = ParameterTypeEnum(self.type) + + +optimization_space_type = dict[ + str, Union[list, ParameterBounds, "optimization_space_type"] +] + + +@dataclass +class Defaults: + """ + Default parameters set for each of the tasks. + + These parameters will be combined with task specific ones to form the final parameters for the Terratorch training. + + Args: + trainer_args (dict): Arguments passed to Lightning Trainer. + terratorch_task (dict): Arguments for the Terratorch Task. + """ + + trainer_args: dict[str, Any] = field(default_factory=dict) + terratorch_task: dict[str, Any] = field(default_factory=dict) + + +@dataclass +class Task: + """ + Parameters passed to define each of the tasks. + + These parameters are combined with any specified defaults to generate the final task parameters. + + Args: + name (str): Name for this task + type (TaskTypeEnum): Type of task. + terratorch_task (dict): Arguments for the Terratorch Task. + datamodule (BaseDataModule | GeoBenchDataModule): Datamodule to be used. + direction (str): One of min or max. Direction to optimize the metric in. + metric (str): Metric to be optimized. Defaults to "val/loss". + early_prune (bool): Whether to prune unpromising runs early. Defaults to False. + early_stop_patience (int, None): Whether to use Lightning early stopping of runs. Defaults to None, which does not do early stopping. + optimization_except (str[str]): HyperParameters from the optimization space to be ignored for this task. + max_run_duration (str, None): maximum allowed run duration in the form DD:HH:MM:SS; will stop a run after this + amount of time. Defaults to None, which doesn't stop runs by time. + """ + + name: str + type: TaskTypeEnum = field(repr=False) + datamodule: BaseDataModule = field(repr=False) + direction: str + terratorch_task: Optional[dict[str, Any]] = None + metric: str = "val/loss" + early_prune: bool = False + early_stop_patience: int | None = None + optimization_except: set[str] = field(default_factory=set) + max_run_duration: str | None = None + + +@dataclass +class TrainingSpec: + task: Task + trainer_args: dict[str, Any] = field(default_factory=dict) + + +def recursive_merge(first_dict: dict[str, Any], second_dict: dict[str, Any]): + # consider using deepmerge instead of this + for key, val in second_dict.items(): + if key not in first_dict: + first_dict[key] = val + else: + # if it is a dictionary, recurse deeper + if isinstance(val, dict): + recursive_merge(first_dict[key], val) + # if it is not further nested, just replace the value + else: + first_dict[key] = val + + +def combine_with_defaults(task: Task, defaults: Defaults) -> TrainingSpec: + """ + Combine task-specific parameters with default parameters. + + Args: + task (Task): Task object containing task-specific parameters. + defaults (Defaults): Defaults object containing default parameters. + + Returns: + TrainingSpec: TrainingSpec object containing combined parameters. + """ + terratorch_task: Optional[Dict[str, Any]] = copy.deepcopy(defaults.terratorch_task) + if terratorch_task is None: + terratorch_task = {} + if task.terratorch_task is None: + task.terratorch_task = {} + # merge task specific args with default args + recursive_merge(terratorch_task, task.terratorch_task) + task_with_defaults = replace(task, terratorch_task=terratorch_task) + return TrainingSpec(task_with_defaults, defaults.trainer_args) diff --git a/terratorch_iterate/iterate_types.py b/terratorch_iterate/iterate_types.py index b9e6082..1ff0201 100644 --- a/terratorch_iterate/iterate_types.py +++ b/terratorch_iterate/iterate_types.py @@ -117,7 +117,7 @@ class Task: name (str): Name for this task type (TaskTypeEnum): Type of task. terratorch_task (dict): Arguments for the Terratorch Task. - datamodule (BaseDataModule): Datamodule to be used. + datamodule (BaseDataModule | GeoBenchDataModule): Datamodule to be used. direction (str): One of min or max. Direction to optimize the metric in. metric (str): Metric to be optimized. Defaults to "val/loss". early_prune (bool): Whether to prune unpromising runs early. Defaults to False. diff --git a/terratorch_iterate/plot_tools.py b/terratorch_iterate/plot_tools.py index 5ce6c82..184eb0a 100644 --- a/terratorch_iterate/plot_tools.py +++ b/terratorch_iterate/plot_tools.py @@ -10,7 +10,6 @@ import json from scipy.stats import trim_mean - sns.set_style("dark", {"grid.color": "0.98", "axes.facecolor": "(0.95, 0.95, 0.97)"}) GEO_BENCH_DIR = "geobench" @@ -28,7 +27,7 @@ def iqm(scores): def bootstrap_iqm( df, - group_keys=("model", "dataset", "partition name"), + group_keys=("model", "dataset", "partition_name"), metric="test_metric", repeat=100, ): @@ -43,12 +42,13 @@ def bootstrap_iqm( def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100): """Stratified bootstrap (by dataset) of all seeds to compute iqm score distribution for each model.""" - group = df.groupby(["model", "dataset", "partition name"]) + + group = df.groupby(["model", "dataset", "partition_name"]) df_list = [] for i in range(repeat): new_df = group.sample(frac=1, replace=True) - series = new_df.groupby(["model", "partition name"])[metric].apply(iqm) + series = new_df.groupby(["model", "partition_name"])[metric].apply(iqm) df_list.append(series.to_frame().reset_index()) new_df = pd.concat(df_list) @@ -57,7 +57,7 @@ def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100): def average_seeds( - df, group_keys=("model", "dataset", "partition name"), metric="test metric" + df, group_keys=("model", "dataset", "partition_name"), metric="test metric" ): """Average seeds for all model and all datasets.""" df_avg = df.groupby(list(group_keys))[metric].mean() @@ -70,8 +70,8 @@ def average_seeds( def extract_1x_data(df_all): """Extract only resutls trained on 100% of the data""" return df_all[ - (df_all["partition name"] == "1.00x train") - | (df_all["partition name"] == "default") + (df_all["partition_name"] == "1.00x train") + | (df_all["partition_name"] == "default") ].copy() @@ -150,13 +150,14 @@ def normalize_data_frame(self, df, metric): def save(self, benchmark_name): """Save normalizer to json file.""" - with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "w") as f: + + with open(f"{benchmark_name}/normalizer.json", "w") as f: json.dump(self.range_dict, f, indent=2) def load_normalizer(benchmark_name): """Load normalizer from json file.""" - with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "r") as f: + with open(f"{benchmark_name}/normalizer.json", "r") as f: range_dict = json.load(f) return Normalizer(range_dict) diff --git a/terratorch_iterate/py.typed b/terratorch_iterate/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/terratorch_iterate/repeat_best_experiment.py b/terratorch_iterate/repeat_best_experiment.py index 3a168dd..6391369 100644 --- a/terratorch_iterate/repeat_best_experiment.py +++ b/terratorch_iterate/repeat_best_experiment.py @@ -36,6 +36,8 @@ valid_task_types, ) +from .utils import get_nested_runs + @ray.remote(num_cpus=8, num_gpus=1) def remote_fit( @@ -255,7 +257,7 @@ def rerun_best_from_backbone( raise Exception( f"output_path must be absolute. Consider using $(pwd)/{output_path}." ) - if (tmp_dir is None) & (use_ray == True): + if (tmp_dir is None) & use_ray: raise Exception("tmp_dir must be specified for runs with ray.") if use_ray: @@ -264,6 +266,7 @@ def rerun_best_from_backbone( if backbone_import: importlib.import_module(backbone_import) mlflow.set_tracking_uri(storage_uri) + mlflow.set_experiment(experiment_name) runs: list[mlflow.entities.Run] = mlflow.search_runs( @@ -293,9 +296,20 @@ def rerun_best_from_backbone( repeated_experiment_name = f"{experiment_name}_repeated_exp" mlflow.set_tracking_uri(repeated_storage_uri) mlflow.set_experiment(repeated_experiment_name) + experiment_id = mlflow.get_experiment_by_name( + repeated_experiment_name + ).experiment_id + + tmp_runs = get_nested_runs(experiment_id, experiment_name, repeated_storage_uri) + if len(tmp_runs) > 0: + if len(tmp_runs) > 1: + tmp_runs = [x for x in runs if x["run_name"] == experiment_name] + run_id = tmp_runs[0]["run_id"] + else: + run_id = None # backbone_name = defaults.terratorch_task["model_args"]["backbone"] - with mlflow.start_run(run_name=experiment_name, run_id=None) as run: + with mlflow.start_run(run_name=experiment_name, run_id=run_id) as run: for task in tasks: logger.info(f"\n\ntask: {task.name}") matching_runs = [ diff --git a/terratorch_iterate/tests/__init__.py b/terratorch_iterate/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/terratorch_iterate/utils.py b/terratorch_iterate/utils.py index 1cf0e38..5fcce35 100644 --- a/terratorch_iterate/utils.py +++ b/terratorch_iterate/utils.py @@ -15,6 +15,9 @@ import sys from mlflow.entities.experiment import Experiment import importlib +from mlflow.tracking import MlflowClient +from mlflow.entities import ViewType +from collections import defaultdict N_TRIALS_DEFAULT = 16 REPEATED_SEEDS_DEFAULT = 10 @@ -214,7 +217,12 @@ def extract_repeated_experiment_results( seed = int(run.info.run_name.split("_")[-1]) if task in task_info: metric_name = task_info[task] - metric_name = "test_test/" + metric_name.split("/")[-1] + metric_name = ( + "test_test/" + metric_name.split("/")[-1] + if "/" in metric_name + else "test_test_" + + metric_name.replace(metric_name.split("_")[0] + "_", "") + ) else: continue @@ -277,7 +285,10 @@ def extract_repeated_experiment_results( f"EXPERIMENT INCOMPLETE: {experiment_name} has {len(combine_task_results)} complete tasks only" ) incomplete_experiments.append(experiment_name) - combine_exp_results = pd.concat(combine_exp_results, axis=0) + if len(combine_exp_results) > 0: + combine_exp_results = pd.concat(combine_exp_results, axis=0) + else: + combine_exp_results = pd.DataFrame() print(f"\n\n\ncombine_exp_results: {combine_exp_results}") return (combine_exp_results, incomplete_experiments) @@ -382,6 +393,7 @@ def get_results_and_parameters( task_metrics: list, task_names: list, num_repetitions: int = REPEATED_SEEDS_DEFAULT, + visualise: bool = True, ) -> pd.DataFrame: """ extracts results and parameters for experiments from mlflow logs @@ -394,6 +406,7 @@ def get_results_and_parameters( task_metrics: metrics used to evaluate each task task_names: list of tasks num_repetitions: number of repeated seeds per task + visualise: whether to visualise the summarised results or not Returns: pd.DataFrame with results and parameters """ @@ -433,6 +446,15 @@ def get_results_and_parameters( results_and_parameters.to_csv( f"{str(results_dir)}/results_and_parameters.csv", index=False ) + + if visualise: + visualize_combined_results( + combined_results=results_and_parameters, + storage_uri=storage_uri, + logger=logger, + plot_file_base_name="summary_plot", + ) + return results_and_parameters @@ -626,14 +648,16 @@ def check_existing_experiments( # check if one of the runs is complete for run in experiment_parent_run_data: - completed_task_run_names, all_tasks_in_experiment_finished, _ = ( - check_existing_task_parent_runs( - logger=logger, - exp_parent_run_id=run.info.run_id, - storage_uri=storage_uri, - experiment_name=experiment_name, - n_trials=n_trials, - ) + ( + completed_task_run_names, + all_tasks_in_experiment_finished, + _, + ) = check_existing_task_parent_runs( + logger=logger, + exp_parent_run_id=run.info.run_id, + storage_uri=storage_uri, + experiment_name=experiment_name, + n_trials=n_trials, ) logger.info(f"tasks that should be completed: {task_names}") logger.info(f"completed_task_run_names: {completed_task_run_names}") @@ -708,7 +732,6 @@ def visualize_combined_results( if not os.path.exists(plots_folder): os.makedirs(plots_folder) - combined_results = [] model_order = [] experiments = list(set(combined_results["experiment_name"])) combined_results = combined_results.rename(columns={"experiment_name": "model"}) @@ -720,63 +743,50 @@ def visualize_combined_results( zip(model_order, sns.color_palette("tab20", n_colors=len(model_order))) ) - try: - # plot raw values - plot_tools.plot_per_dataset( - combined_results, - model_order=model_order, - plot_file_base_name=plot_file_base_name, - model_colors=model_colors, - metric="test metric", - sharey=False, - inner="points", - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - plt.savefig( - str(plots_folder / f"violin_{plot_file_base_name}_raw.png"), - bbox_inches="tight", - ) - plt.close() + plot_tools.plot_per_dataset( + combined_results, + model_order=model_order, + aggregated_name=plot_file_base_name, + model_colors=model_colors, + metric="test metric", + sharey=False, + inner="points", + fig_size=fig_size, + n_legend_rows=n_legend_rows, + ) + plt.savefig( + str(f"{plots_folder}/violin_{plot_file_base_name}_raw.png"), + bbox_inches="tight", + ) + plt.close() - # plot normalized, bootstrapped values values - plot_tools.make_normalizer( - combined_results, - metrics=("test metric",), - benchmark_name=plot_file_base_name, - ) - bootstrapped_iqm, normalized_combined_results = ( - plot_tools.normalize_bootstrap_and_plot( - combined_results, - plot_file_base_name=plot_file_base_name, - metric="test metric", - benchmark_name=plot_file_base_name, - model_order=model_order, - model_colors=model_colors, - fig_size=fig_size, - n_legend_rows=n_legend_rows, - ) - ) - # dataset_name_map=dataset_name_map) - - plt.savefig( - str( - plots_folder - / f"violin_{plot_file_base_name}_normalized_bootstrapped.png" - ), - bbox_inches="tight", - ) - plt.close() - bootstrapped_iqm.to_csv( - str(tables_folder / f"{plot_file_base_name}_bootstrapped_iqm.csv") - ) - combined_results.to_csv( - str( - tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv" - ) - ) - except Exception as e: - logger.info(f"could not visualize due to error: {e}") + # plot normalized, bootstrapped values values + plot_tools.make_normalizer( + combined_results, + metrics=("test metric",), + benchmark_name=plots_folder, + ) + + plot_tools.normalize_bootstrap_and_plot( + combined_results, + # plot_file_base_name=plot_file_base_name, + metric="test metric", + benchmark_name=plots_folder, + model_order=model_order, + model_colors=model_colors, + fig_size=fig_size, + n_legend_rows=n_legend_rows, + ) + + plt.savefig( + str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png"), + bbox_inches="tight", + ) + plt.close() + + combined_results.to_csv( + str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv") + ) def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.RootLogger: @@ -816,7 +826,7 @@ def import_custom_modules( sys.path.insert(0, str(workdir)) try: - module = importlib.import_module(module_dir) + importlib.import_module(module_dir) logger.info(f"Found {custom_modules_path}") except ImportError: raise ImportError( @@ -847,9 +857,7 @@ def import_custom_modules( ) settings_per_model = [ - "early_stopping_10_data_100_perc", - "early_stopping_50_data_10_perc", - "early_stopping_50_data_100_perc", + "detection", ] # create box plots across multiple models @@ -863,3 +871,54 @@ def import_custom_modules( logger=logger, plot_file_base_name=f"multiple_models_{setting}", ) + + +### code written with the help of Perplexity platform +def get_nested_runs(experiment_id, filter_string=None, mlflow_uri="mlflow"): + client = MlflowClient(mlflow_uri) + + # Get all runs for the experiment + all_runs = client.search_runs( + experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY + ) + + # Create a dictionary to store the run hierarchy + run_hierarchy = defaultdict(list) + parent_runs = [] + + # First pass: Identify parent-child relationships + for run in all_runs: + parent_run_id = run.data.tags.get("mlflow.parentRunId") + + if parent_run_id: + run_hierarchy[parent_run_id].append(run) + else: + parent_runs.append(run) + + # Function to create a nested dictionary for a run and its children + def create_nested_dict(run): + run_dict = { + "run": run, + "run_id": run.info.run_id, + "run_name": run.data.tags.get("mlflow.runName", "Unnamed"), + "status": run.info.status, + "start_time": run.info.start_time, + "end_time": run.info.end_time, + "children": [ + create_nested_dict(child) for child in run_hierarchy[run.info.run_id] + ], + } + return run_dict + + # Create the final nested structure + if filter_string: + nested_runs = [ + create_nested_dict(parent_run) + for parent_run in parent_runs + if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string) + > -1 + ] + else: + nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs] + + return nested_runs diff --git a/tests/unit/test_model_fitting.py b/tests/unit/test_model_fitting.py new file mode 100644 index 0000000..7a4dcb2 --- /dev/null +++ b/tests/unit/test_model_fitting.py @@ -0,0 +1,38 @@ +from pathlib import Path + +from jsonargparse import ArgumentParser, Namespace +from terratorch_iterate.iterate_types import Task +import uuid +import pytest + + +@pytest.mark.skip() +def test_launch_training(): + # experiment_name='dofa_large_patch16_224_upernetdecoder_true_modified_continue_False_test_models_True' metric='val/loss' storage_uri='/dccstor/geofm-finetuning/terratorch-iterate-test/39d14a9ed79e4ee39739fa92a4cdd758/hpo' direction='max' + random_hex = uuid.uuid4().hex + + storage_uri = Path(f"/tmp/{random_hex}") + if not storage_uri.exists(): + storage_uri.mkdir() + parser = ArgumentParser() + config_path = ( + Path(__file__).parent.parent.parent + / "configs/tests/dofa_large_patch16_224_upernetdecoder_true_modified.yaml" + ) + assert config_path.exists() + config = parser.parse_path(config_path) + config_init: Namespace = parser.instantiate_classes(config) + tasks = config_init.tasks + assert isinstance(tasks, list), f"Error! {tasks=} is not a list" + for t in tasks: + assert isinstance(t, Task), f"Error! {t=} is not a Task" + # data_module = MNzCattleNonGeoDataModule() + # trainer = Trainer(**training_spec_copy.trainer_args) + # launch_training( + # trainer=trainer, + # datamodule=datamodule, + # experiment_name=experiment_name, + # metric=metric, + # direction=direction, + # storage_uri=storage_uri, + # )