diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
deleted file mode 100644
index 3a2b5d1..0000000
--- a/.github/workflows/pylint.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-name: Pylint
-
-on: [push]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ["3.10", "3.11", "3.12"]
-    steps:
-    - uses: actions/checkout@v5
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v6
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install pylint
-    - name: Analysing the code with pylint
-      run: |
-        pylint $(git ls-files '*.py')
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 72e1ce6..1067638 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -27,13 +27,11 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 pytest
+        python -m pip install ruff pytest
         python -m pip install -e .
         python -m pip install -e ".[dev]"
         python -m pip install -e ".[test]"
-    - name: Lint with flake8
+    - name: Lint with ruff
       run: |
         # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=F821,F401 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        ruff check
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 08e218f..e7e44d1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,9 +10,10 @@ repos:
     # You are encouraged to use static refs such as tags, instead of branch name
     #
     # Running "pre-commit autoupdate" automatically updates rev to latest tag
-    rev: 0.13.1+ibm.61.dss
+    rev: 0.13.1+ibm.62.dss
     hooks:
       - id: detect-secrets # pragma: whitelist secret
+        additional_dependencies: [boxsdk<4]
         # Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options.
         # You may also run `pre-commit run detect-secrets` to preview the scan result.
         # when "--baseline" without "--use-all-plugins", pre-commit scan with just plugins in baseline file
@@ -20,9 +21,17 @@ repos:
         # add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets
         args: [--baseline, .secrets.baseline, --use-all-plugins]
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.12.3
+    # Ruff version.
+    rev: v0.14.2
     hooks:
+      # Run the linter.
+      - id: ruff-check
+        types_or: [ python, pyi ]
+        args: [ --fix ]
+      # Run the formatter.
       - id: ruff-format
-        types_or:
-          - python
-          - jupyter
+        types_or: [ python, pyi ]
+  - repo: https://github.com/mattlqx/pre-commit-sign
+    rev: v1.2.0
+    hooks:
+    - id: sign-commit
diff --git a/.secrets.baseline b/.secrets.baseline
index e3ac1a7..d18435e 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2025-10-01T20:02:29Z",
+  "generated_at": "2025-10-28T12:40:55Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -81,21 +81,21 @@
       {
         "hashed_secret": "5810b71c07271f259208c5790992170ac1e13b37",
         "is_verified": false,
-        "line_number": 437,
+        "line_number": 436,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
         "hashed_secret": "1c1dc227208cec78bbdb8d9247164879f908a9ad",
         "is_verified": false,
-        "line_number": 482,
+        "line_number": 481,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
         "hashed_secret": "e57967bc8f018a30bb192717673876f0ebdbe5d9",
         "is_verified": false,
-        "line_number": 558,
+        "line_number": 557,
         "type": "Base64 High Entropy String",
         "verified_result": null
       }
@@ -104,41 +104,41 @@
       {
         "hashed_secret": "e52b18568a4fa073b958134ea5ec0f9407b6ebc3",
         "is_verified": false,
-        "line_number": 352,
+        "line_number": 345,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
         "hashed_secret": "43cf2641021e5833120affd5a2bcdf35089eaf75",
         "is_verified": false,
-        "line_number": 417,
+        "line_number": 410,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
         "hashed_secret": "78f9a422a3afb6ff5aff30094699c2b299dfd614",
         "is_verified": false,
-        "line_number": 949,
+        "line_number": 942,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
         "hashed_secret": "2525429c7a93512ed0c4b799b867a83a6b19f7ff",
         "is_verified": false,
-        "line_number": 1014,
+        "line_number": 1007,
         "type": "Base64 High Entropy String",
         "verified_result": null
       },
       {
         "hashed_secret": "8915fab07d3bf85d3755089a7fc82e911405d40a",
         "is_verified": false,
-        "line_number": 1080,
+        "line_number": 1073,
         "type": "Base64 High Entropy String",
         "verified_result": null
       }
     ]
   },
-  "version": "0.13.1+ibm.61.dss",
+  "version": "0.13.1+ibm.62.dss",
   "word_list": {
     "file": null,
     "hash": null
diff --git a/README.md b/README.md
index 6af3542..58bf59d 100644
--- a/README.md
+++ b/README.md
@@ -75,28 +75,33 @@ If users want to optimize hyperparameters:
 terratorch iterate --hpo --config <config-file>
 ```
 
+Another way to run terratorch-iterate is to omit `terratorch` by running:
+```shell
+iterate --hpo --config <config-file>
+```
+
 For instance:
 ```shell
-terratorch iterate --hpo --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml
+iterate --hpo --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml
 ```
 
 
 If users want to rerun best experiment, please use the same config file. Additionally, the `parent_run_id`, which is the mlflow run id from optimization, should be added as shown below:
 ```shell
-terratorch iterate --repeat --config <config-file> --parent_run_id <mlflow run_id from hpo>
+iterate --repeat --config <config-file> --parent_run_id <mlflow run_id from hpo>
 ```
 For instance:
 ```shell
-terratorch iterate --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml --parent_run_id 61bdee4a35a94f988ad30c46c87d4fbd
+iterate --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml --parent_run_id 61bdee4a35a94f988ad30c46c87d4fbd
 ```
 
 If users want to optimize hyperparameters then the rerun best experiment in a single command, please use both settings as shown below:
 ```shell
-terratorch iterate --hpo --repeat --config <config-file>
+iterate --hpo --repeat --config <config-file>
 ```
 For instance:
 ```shell
-terratorch iterate --hpo --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml
+iterate --hpo --repeat --config configs/dofa_large_patch16_224_upernetdecoder_true_modified.yaml
 ```
 
 To check the experiment results, use `mlflow ui --host $(hostname -f) --port <port> --backend-store-uri <storage_uri>` 
@@ -121,11 +126,11 @@ See `configs/summarize_results_template.yaml` in the git repo for an example.
 
 To summarize results and hyperparameters, please run the following: 
 ```shell
-terratorch iterate --summarize --config <summarize-config-file>
+iterate --summarize --config <summarize-config-file>
 ```
 For instance:
 ```shell
-terratorch iterate --summarize --config configs/summarize_results.yaml
+iterate --summarize --config configs/summarize_results.yaml
 ```
 
 The results and hyperparameters are extracted into a csv file. For example, if `storage_uri` is `/opt/benchmark_experiments/hpo`, then sumarized results will be saved in last file as shown below:
diff --git a/plotting/plot_results_mlflow.ipynb b/plotting/plot_results_mlflow.ipynb
index 5d3a752..2f1865e 100644
--- a/plotting/plot_results_mlflow.ipynb
+++ b/plotting/plot_results_mlflow.ipynb
@@ -2,14 +2,13 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import seaborn as sns\n",
-    "import matplotlib.pyplot as plt\n",
-    "import json"
+    "import matplotlib.pyplot as plt"
    ]
   },
   {
diff --git a/plotting/plot_results_repeated_runs.ipynb b/plotting/plot_results_repeated_runs.ipynb
index bcb8f85..a612fa6 100644
--- a/plotting/plot_results_repeated_runs.ipynb
+++ b/plotting/plot_results_repeated_runs.ipynb
@@ -19,25 +19,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "import os\n",
     "\n",
     "os.environ[\"GEO_BENCH_DIR\"] = \"/Users/cpi/terratorch\"\n",
-    "import numpy as np\n",
     "from matplotlib import pyplot as plt\n",
     "import pandas as pd\n",
-    "from pathlib import Path\n",
     "import seaborn as sns\n",
     "\n",
-    "import geobench as gb\n",
-    "\n",
     "# from geobench_exp.experiment import parse_results\n",
-    "from matplotlib.ticker import FormatStrFormatter\n",
-    "import json\n",
-    "from scipy.stats import trim_mean\n",
     "import plot_tools"
    ]
   },
diff --git a/pyproject.toml b/pyproject.toml
index e36edb0..0d4e4d6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,13 +8,13 @@ requires = ["setuptools >= 77.0.3"]
 build-backend = "setuptools.build_meta"
 
 [tool.setuptools.packages.find]
-include = ["benchmark*"]
+include = ["terratorch_iterate*"]
 
 [project]
 
 name = "terratorch-iterate"
-version = "0.2.0"
-requires-python = ">= 3.10"
+version = "0.2.2rc1"
+requires-python = ">= 3.11"
 description = "A terratorch's plugin for benchmarking and hyperparameter optimization"
 authors = [
       { name = "Carlos Gomes"},
@@ -61,6 +61,7 @@ dependencies = [
 "more-itertools", 
 "importlib-metadata",
 "numpy",
+"mlflow",
 "optuna",
 "types-tabulate",
 "ray",
@@ -69,7 +70,6 @@ dependencies = [
 "opencv-python-headless",
 "configspace",
 "optuna-integration",
-"seaborn",
 "psutil",
 "tabulate>=0.9.0",
 ]
@@ -86,7 +86,8 @@ dev = [
   "mkdocstrings[python]",
   "mike", # for building docs with versions
   "tox",
-  "pre-commit"
+  "pre-commit",
+  "seaborn"
 ]
 
 test = [
diff --git a/run_tests.py b/run_tests.py
index 60fa0b1..bd814df 100644
--- a/run_tests.py
+++ b/run_tests.py
@@ -59,7 +59,7 @@ def submit_job(
     if tc_id is not None:
         jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" pytest -vv tests/integration/test_main.py::test_main[{tc_id}]'
     elif config is not None:
-        jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" terratorch iterate --hpo --config {config}'
+        jbsub = f'bsub -e {err_file} -o {out_file} -M 40G -gpu "num=1/task:mode=exclusive_process:gmodel=NVIDIAA100_SXM4_80GB" iterate --hpo --config {config}'
     else:
         raise ValueError("Error! Either tc_id or config must be not None")
     cmd = jbsub.split()
diff --git a/terratorch_iterate/backbone_benchmark.py b/terratorch_iterate/backbone_benchmark.py
index 412d245..02969d6 100644
--- a/terratorch_iterate/backbone_benchmark.py
+++ b/terratorch_iterate/backbone_benchmark.py
@@ -349,6 +349,7 @@ def benchmark_backbone(
     mlflow.set_tracking_uri(storage_uri)
     logger.info(f"Setting experiment name: {experiment_name}")
     mlflow.set_experiment(experiment_name)
+    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
 
     optimization_space = parse_optimization_space(optimization_space)
 
diff --git a/terratorch_iterate/benchmark_types.py b/terratorch_iterate/benchmark_types.py
new file mode 100644
index 0000000..1ff0201
--- /dev/null
+++ b/terratorch_iterate/benchmark_types.py
@@ -0,0 +1,181 @@
+"""
+This module defines all the types expected at input. Used for type checking by jsonargparse.
+"""
+
+from ast import Dict
+import copy
+import enum
+from dataclasses import dataclass, field, replace
+from typing import Any, Optional, Union
+from terratorch.tasks import (
+    ClassificationTask,
+    MultiLabelClassificationTask,
+    PixelwiseRegressionTask,
+    SemanticSegmentationTask,
+    ObjectDetectionTask,
+)
+from torchgeo.datamodules import BaseDataModule
+
+valid_task_types = type[
+    SemanticSegmentationTask
+    | ClassificationTask
+    | PixelwiseRegressionTask
+    | ObjectDetectionTask
+]
+
+
+@dataclass
+class TaskTypeEnum(enum.Enum):
+    """
+    Enum for the type of task to be performed. segmentation, regression or classification.
+    """
+
+    segmentation = "segmentation"
+    regression = "regression"
+    classification = "classification"
+    multilabel_classification = "multilabel_classification"
+    object_detection = "object_detection"
+
+    def get_class_from_enum(
+        self,
+    ) -> valid_task_types:
+        match self.value:
+            case TaskTypeEnum.segmentation.value:
+                return SemanticSegmentationTask
+            case TaskTypeEnum.regression.value:
+                return PixelwiseRegressionTask
+            case TaskTypeEnum.classification.value:
+                return ClassificationTask
+            case TaskTypeEnum.multilabel_classification.value:
+                return MultiLabelClassificationTask
+            case TaskTypeEnum.object_detection.value:
+                return ObjectDetectionTask
+            case _:
+                raise TypeError("Task type does not exist")
+
+
+class ParameterTypeEnum(enum.Enum):
+    """
+    Enum for the type of parameter allowed in ParameterBounds. integer or real.
+    """
+
+    integer = "int"
+    real = "real"
+
+
+@dataclass
+class ParameterBounds:
+    """
+    Dataclass defining a numerical range to search over.
+
+    Args:
+        min (float | int): Minimum.
+        max (float | int): Maximum.
+        type (ParameterTypeEnum): Whether the range is in the space of integers or real numbers.
+        log (bool): Whether to search over the log space (useful for parameters that vary wildly in scale, e.g. learning rate)
+    """
+
+    min: float | int
+    max: float | int
+    type: ParameterTypeEnum
+    log: bool = False
+
+    def __post_init__(self):
+        if not isinstance(self.type, ParameterTypeEnum):
+            self.type = ParameterTypeEnum(self.type)
+
+
+optimization_space_type = dict[
+    str, Union[list, ParameterBounds, "optimization_space_type"]
+]
+
+
+@dataclass
+class Defaults:
+    """
+    Default parameters set for each of the tasks.
+
+    These parameters will be combined with task specific ones to form the final parameters for the Terratorch training.
+
+    Args:
+        trainer_args (dict): Arguments passed to Lightning Trainer.
+        terratorch_task (dict): Arguments for the Terratorch Task.
+    """
+
+    trainer_args: dict[str, Any] = field(default_factory=dict)
+    terratorch_task: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class Task:
+    """
+    Parameters passed to define each of the tasks.
+
+    These parameters are combined with any specified defaults to generate the final task parameters.
+
+    Args:
+        name (str): Name for this task
+        type (TaskTypeEnum): Type of task.
+        terratorch_task (dict): Arguments for the Terratorch Task.
+        datamodule (BaseDataModule  | GeoBenchDataModule): Datamodule to be used.
+        direction (str): One of min or max. Direction to optimize the metric in.
+        metric (str): Metric to be optimized. Defaults to "val/loss".
+        early_prune (bool): Whether to prune unpromising runs early. Defaults to False.
+        early_stop_patience (int, None): Whether to use Lightning early stopping of runs. Defaults to None, which does not do early stopping.
+        optimization_except (str[str]): HyperParameters from the optimization space to be ignored for this task.
+        max_run_duration (str, None): maximum allowed run duration in the form DD:HH:MM:SS; will stop a run after this
+            amount of time. Defaults to None, which doesn't stop runs by time.
+    """
+
+    name: str
+    type: TaskTypeEnum = field(repr=False)
+    datamodule: BaseDataModule = field(repr=False)
+    direction: str
+    terratorch_task: Optional[dict[str, Any]] = None
+    metric: str = "val/loss"
+    early_prune: bool = False
+    early_stop_patience: int | None = None
+    optimization_except: set[str] = field(default_factory=set)
+    max_run_duration: str | None = None
+
+
+@dataclass
+class TrainingSpec:
+    task: Task
+    trainer_args: dict[str, Any] = field(default_factory=dict)
+
+
+def recursive_merge(first_dict: dict[str, Any], second_dict: dict[str, Any]):
+    # consider using deepmerge instead of this
+    for key, val in second_dict.items():
+        if key not in first_dict:
+            first_dict[key] = val
+        else:
+            # if it is a dictionary, recurse deeper
+            if isinstance(val, dict):
+                recursive_merge(first_dict[key], val)
+            # if it is not further nested, just replace the value
+            else:
+                first_dict[key] = val
+
+
+def combine_with_defaults(task: Task, defaults: Defaults) -> TrainingSpec:
+    """
+    Combine task-specific parameters with default parameters.
+
+    Args:
+        task (Task): Task object containing task-specific parameters.
+        defaults (Defaults): Defaults object containing default parameters.
+
+    Returns:
+        TrainingSpec: TrainingSpec object containing combined parameters.
+    """
+    terratorch_task: Optional[Dict[str, Any]] = copy.deepcopy(defaults.terratorch_task)
+    if terratorch_task is None:
+        terratorch_task = {}
+    if task.terratorch_task is None:
+        task.terratorch_task = {}
+    # merge task specific args with default args
+    recursive_merge(terratorch_task, task.terratorch_task)
+    task_with_defaults = replace(task, terratorch_task=terratorch_task)
+    return TrainingSpec(task_with_defaults, defaults.trainer_args)
diff --git a/terratorch_iterate/iterate_types.py b/terratorch_iterate/iterate_types.py
index b9e6082..1ff0201 100644
--- a/terratorch_iterate/iterate_types.py
+++ b/terratorch_iterate/iterate_types.py
@@ -117,7 +117,7 @@ class Task:
         name (str): Name for this task
         type (TaskTypeEnum): Type of task.
         terratorch_task (dict): Arguments for the Terratorch Task.
-        datamodule (BaseDataModule): Datamodule to be used.
+        datamodule (BaseDataModule  | GeoBenchDataModule): Datamodule to be used.
         direction (str): One of min or max. Direction to optimize the metric in.
         metric (str): Metric to be optimized. Defaults to "val/loss".
         early_prune (bool): Whether to prune unpromising runs early. Defaults to False.
diff --git a/terratorch_iterate/plot_tools.py b/terratorch_iterate/plot_tools.py
index 5ce6c82..184eb0a 100644
--- a/terratorch_iterate/plot_tools.py
+++ b/terratorch_iterate/plot_tools.py
@@ -10,7 +10,6 @@
 import json
 from scipy.stats import trim_mean
 
-
 sns.set_style("dark", {"grid.color": "0.98", "axes.facecolor": "(0.95, 0.95, 0.97)"})
 GEO_BENCH_DIR = "geobench"
 
@@ -28,7 +27,7 @@ def iqm(scores):
 
 def bootstrap_iqm(
     df,
-    group_keys=("model", "dataset", "partition name"),
+    group_keys=("model", "dataset", "partition_name"),
     metric="test_metric",
     repeat=100,
 ):
@@ -43,12 +42,13 @@ def bootstrap_iqm(
 
 def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100):
     """Stratified bootstrap (by dataset) of all seeds to compute iqm score distribution for each model."""
-    group = df.groupby(["model", "dataset", "partition name"])
+
+    group = df.groupby(["model", "dataset", "partition_name"])
 
     df_list = []
     for i in range(repeat):
         new_df = group.sample(frac=1, replace=True)
-        series = new_df.groupby(["model", "partition name"])[metric].apply(iqm)
+        series = new_df.groupby(["model", "partition_name"])[metric].apply(iqm)
         df_list.append(series.to_frame().reset_index())
 
     new_df = pd.concat(df_list)
@@ -57,7 +57,7 @@ def bootstrap_iqm_aggregate(df, metric="test_metric", repeat=100):
 
 
 def average_seeds(
-    df, group_keys=("model", "dataset", "partition name"), metric="test metric"
+    df, group_keys=("model", "dataset", "partition_name"), metric="test metric"
 ):
     """Average seeds for all model and all datasets."""
     df_avg = df.groupby(list(group_keys))[metric].mean()
@@ -70,8 +70,8 @@ def average_seeds(
 def extract_1x_data(df_all):
     """Extract only resutls trained on 100% of the data"""
     return df_all[
-        (df_all["partition name"] == "1.00x train")
-        | (df_all["partition name"] == "default")
+        (df_all["partition_name"] == "1.00x train")
+        | (df_all["partition_name"] == "default")
     ].copy()
 
 
@@ -150,13 +150,14 @@ def normalize_data_frame(self, df, metric):
 
     def save(self, benchmark_name):
         """Save normalizer to json file."""
-        with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "w") as f:
+
+        with open(f"{benchmark_name}/normalizer.json", "w") as f:
             json.dump(self.range_dict, f, indent=2)
 
 
 def load_normalizer(benchmark_name):
     """Load normalizer from json file."""
-    with open(GEO_BENCH_DIR / benchmark_name / "normalizer.json", "r") as f:
+    with open(f"{benchmark_name}/normalizer.json", "r") as f:
         range_dict = json.load(f)
     return Normalizer(range_dict)
 
diff --git a/terratorch_iterate/py.typed b/terratorch_iterate/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/terratorch_iterate/repeat_best_experiment.py b/terratorch_iterate/repeat_best_experiment.py
index 3a168dd..6391369 100644
--- a/terratorch_iterate/repeat_best_experiment.py
+++ b/terratorch_iterate/repeat_best_experiment.py
@@ -36,6 +36,8 @@
     valid_task_types,
 )
 
+from .utils import get_nested_runs
+
 
 @ray.remote(num_cpus=8, num_gpus=1)
 def remote_fit(
@@ -255,7 +257,7 @@ def rerun_best_from_backbone(
         raise Exception(
             f"output_path must be absolute. Consider using $(pwd)/{output_path}."
         )
-    if (tmp_dir is None) & (use_ray == True):
+    if (tmp_dir is None) & use_ray:
         raise Exception("tmp_dir must be specified for runs with ray.")
 
     if use_ray:
@@ -264,6 +266,7 @@ def rerun_best_from_backbone(
     if backbone_import:
         importlib.import_module(backbone_import)
     mlflow.set_tracking_uri(storage_uri)
+
     mlflow.set_experiment(experiment_name)
 
     runs: list[mlflow.entities.Run] = mlflow.search_runs(
@@ -293,9 +296,20 @@ def rerun_best_from_backbone(
     repeated_experiment_name = f"{experiment_name}_repeated_exp"
     mlflow.set_tracking_uri(repeated_storage_uri)
     mlflow.set_experiment(repeated_experiment_name)
+    experiment_id = mlflow.get_experiment_by_name(
+        repeated_experiment_name
+    ).experiment_id
+
+    tmp_runs = get_nested_runs(experiment_id, experiment_name, repeated_storage_uri)
+    if len(tmp_runs) > 0:
+        if len(tmp_runs) > 1:
+            tmp_runs = [x for x in runs if x["run_name"] == experiment_name]
+        run_id = tmp_runs[0]["run_id"]
+    else:
+        run_id = None
 
     # backbone_name = defaults.terratorch_task["model_args"]["backbone"]
-    with mlflow.start_run(run_name=experiment_name, run_id=None) as run:
+    with mlflow.start_run(run_name=experiment_name, run_id=run_id) as run:
         for task in tasks:
             logger.info(f"\n\ntask: {task.name}")
             matching_runs = [
diff --git a/terratorch_iterate/tests/__init__.py b/terratorch_iterate/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/terratorch_iterate/utils.py b/terratorch_iterate/utils.py
index 1cf0e38..5fcce35 100644
--- a/terratorch_iterate/utils.py
+++ b/terratorch_iterate/utils.py
@@ -15,6 +15,9 @@
 import sys
 from mlflow.entities.experiment import Experiment
 import importlib
+from mlflow.tracking import MlflowClient
+from mlflow.entities import ViewType
+from collections import defaultdict
 
 N_TRIALS_DEFAULT = 16
 REPEATED_SEEDS_DEFAULT = 10
@@ -214,7 +217,12 @@ def extract_repeated_experiment_results(
                 seed = int(run.info.run_name.split("_")[-1])
                 if task in task_info:
                     metric_name = task_info[task]
-                    metric_name = "test_test/" + metric_name.split("/")[-1]
+                    metric_name = (
+                        "test_test/" + metric_name.split("/")[-1]
+                        if "/" in metric_name
+                        else "test_test_"
+                        + metric_name.replace(metric_name.split("_")[0] + "_", "")
+                    )
                 else:
                     continue
 
@@ -277,7 +285,10 @@ def extract_repeated_experiment_results(
                 f"EXPERIMENT INCOMPLETE: {experiment_name} has {len(combine_task_results)} complete tasks only"
             )
             incomplete_experiments.append(experiment_name)
-    combine_exp_results = pd.concat(combine_exp_results, axis=0)
+    if len(combine_exp_results) > 0:
+        combine_exp_results = pd.concat(combine_exp_results, axis=0)
+    else:
+        combine_exp_results = pd.DataFrame()
     print(f"\n\n\ncombine_exp_results: {combine_exp_results}")
     return (combine_exp_results, incomplete_experiments)
 
@@ -382,6 +393,7 @@ def get_results_and_parameters(
     task_metrics: list,
     task_names: list,
     num_repetitions: int = REPEATED_SEEDS_DEFAULT,
+    visualise: bool = True,
 ) -> pd.DataFrame:
     """
     extracts results and parameters for experiments from mlflow logs
@@ -394,6 +406,7 @@ def get_results_and_parameters(
         task_metrics: metrics used to evaluate each task
         task_names: list of tasks
         num_repetitions: number of repeated seeds per task
+        visualise: whether to visualise the summarised results or not
     Returns:
         pd.DataFrame with results and parameters
     """
@@ -433,6 +446,15 @@ def get_results_and_parameters(
     results_and_parameters.to_csv(
         f"{str(results_dir)}/results_and_parameters.csv", index=False
     )
+
+    if visualise:
+        visualize_combined_results(
+            combined_results=results_and_parameters,
+            storage_uri=storage_uri,
+            logger=logger,
+            plot_file_base_name="summary_plot",
+        )
+
     return results_and_parameters
 
 
@@ -626,14 +648,16 @@ def check_existing_experiments(
 
         # check if one of the runs is complete
         for run in experiment_parent_run_data:
-            completed_task_run_names, all_tasks_in_experiment_finished, _ = (
-                check_existing_task_parent_runs(
-                    logger=logger,
-                    exp_parent_run_id=run.info.run_id,
-                    storage_uri=storage_uri,
-                    experiment_name=experiment_name,
-                    n_trials=n_trials,
-                )
+            (
+                completed_task_run_names,
+                all_tasks_in_experiment_finished,
+                _,
+            ) = check_existing_task_parent_runs(
+                logger=logger,
+                exp_parent_run_id=run.info.run_id,
+                storage_uri=storage_uri,
+                experiment_name=experiment_name,
+                n_trials=n_trials,
             )
             logger.info(f"tasks that should be completed: {task_names}")
             logger.info(f"completed_task_run_names: {completed_task_run_names}")
@@ -708,7 +732,6 @@ def visualize_combined_results(
     if not os.path.exists(plots_folder):
         os.makedirs(plots_folder)
 
-    combined_results = []
     model_order = []
     experiments = list(set(combined_results["experiment_name"]))
     combined_results = combined_results.rename(columns={"experiment_name": "model"})
@@ -720,63 +743,50 @@ def visualize_combined_results(
         zip(model_order, sns.color_palette("tab20", n_colors=len(model_order)))
     )
 
-    try:
-        # plot raw values
-        plot_tools.plot_per_dataset(
-            combined_results,
-            model_order=model_order,
-            plot_file_base_name=plot_file_base_name,
-            model_colors=model_colors,
-            metric="test metric",
-            sharey=False,
-            inner="points",
-            fig_size=fig_size,
-            n_legend_rows=n_legend_rows,
-        )
-        plt.savefig(
-            str(plots_folder / f"violin_{plot_file_base_name}_raw.png"),
-            bbox_inches="tight",
-        )
-        plt.close()
+    plot_tools.plot_per_dataset(
+        combined_results,
+        model_order=model_order,
+        aggregated_name=plot_file_base_name,
+        model_colors=model_colors,
+        metric="test metric",
+        sharey=False,
+        inner="points",
+        fig_size=fig_size,
+        n_legend_rows=n_legend_rows,
+    )
+    plt.savefig(
+        str(f"{plots_folder}/violin_{plot_file_base_name}_raw.png"),
+        bbox_inches="tight",
+    )
+    plt.close()
 
-        # plot normalized, bootstrapped values values
-        plot_tools.make_normalizer(
-            combined_results,
-            metrics=("test metric",),
-            benchmark_name=plot_file_base_name,
-        )
-        bootstrapped_iqm, normalized_combined_results = (
-            plot_tools.normalize_bootstrap_and_plot(
-                combined_results,
-                plot_file_base_name=plot_file_base_name,
-                metric="test metric",
-                benchmark_name=plot_file_base_name,
-                model_order=model_order,
-                model_colors=model_colors,
-                fig_size=fig_size,
-                n_legend_rows=n_legend_rows,
-            )
-        )
-        # dataset_name_map=dataset_name_map)
-
-        plt.savefig(
-            str(
-                plots_folder
-                / f"violin_{plot_file_base_name}_normalized_bootstrapped.png"
-            ),
-            bbox_inches="tight",
-        )
-        plt.close()
-        bootstrapped_iqm.to_csv(
-            str(tables_folder / f"{plot_file_base_name}_bootstrapped_iqm.csv")
-        )
-        combined_results.to_csv(
-            str(
-                tables_folder / f"{plot_file_base_name}_normalized_combined_results.csv"
-            )
-        )
-    except Exception as e:
-        logger.info(f"could not visualize due to error: {e}")
+    # plot normalized, bootstrapped values values
+    plot_tools.make_normalizer(
+        combined_results,
+        metrics=("test metric",),
+        benchmark_name=plots_folder,
+    )
+
+    plot_tools.normalize_bootstrap_and_plot(
+        combined_results,
+        # plot_file_base_name=plot_file_base_name,
+        metric="test metric",
+        benchmark_name=plots_folder,
+        model_order=model_order,
+        model_colors=model_colors,
+        fig_size=fig_size,
+        n_legend_rows=n_legend_rows,
+    )
+
+    plt.savefig(
+        str(f"{plots_folder}/violin_{plot_file_base_name}_normalized_bootstrapped.png"),
+        bbox_inches="tight",
+    )
+    plt.close()
+
+    combined_results.to_csv(
+        str(f"{tables_folder}/{plot_file_base_name}_normalized_combined_results.csv")
+    )
 
 
 def get_logger(log_level="INFO", log_folder="./experiment_logs") -> logging.RootLogger:
@@ -816,7 +826,7 @@ def import_custom_modules(
             sys.path.insert(0, str(workdir))
 
             try:
-                module = importlib.import_module(module_dir)
+                importlib.import_module(module_dir)
                 logger.info(f"Found {custom_modules_path}")
             except ImportError:
                 raise ImportError(
@@ -847,9 +857,7 @@ def import_custom_modules(
     )
 
     settings_per_model = [
-        "early_stopping_10_data_100_perc",
-        "early_stopping_50_data_10_perc",
-        "early_stopping_50_data_100_perc",
+        "detection",
     ]
 
     # create box plots across multiple models
@@ -863,3 +871,54 @@ def import_custom_modules(
             logger=logger,
             plot_file_base_name=f"multiple_models_{setting}",
         )
+
+
+### code written with the help of Perplexity platform
+def get_nested_runs(experiment_id, filter_string=None, mlflow_uri="mlflow"):
+    client = MlflowClient(mlflow_uri)
+
+    # Get all runs for the experiment
+    all_runs = client.search_runs(
+        experiment_ids=[experiment_id], run_view_type=ViewType.ACTIVE_ONLY
+    )
+
+    # Create a dictionary to store the run hierarchy
+    run_hierarchy = defaultdict(list)
+    parent_runs = []
+
+    # First pass: Identify parent-child relationships
+    for run in all_runs:
+        parent_run_id = run.data.tags.get("mlflow.parentRunId")
+
+        if parent_run_id:
+            run_hierarchy[parent_run_id].append(run)
+        else:
+            parent_runs.append(run)
+
+    # Function to create a nested dictionary for a run and its children
+    def create_nested_dict(run):
+        run_dict = {
+            "run": run,
+            "run_id": run.info.run_id,
+            "run_name": run.data.tags.get("mlflow.runName", "Unnamed"),
+            "status": run.info.status,
+            "start_time": run.info.start_time,
+            "end_time": run.info.end_time,
+            "children": [
+                create_nested_dict(child) for child in run_hierarchy[run.info.run_id]
+            ],
+        }
+        return run_dict
+
+    # Create the final nested structure
+    if filter_string:
+        nested_runs = [
+            create_nested_dict(parent_run)
+            for parent_run in parent_runs
+            if parent_run.data.tags.get("mlflow.runName", "Unnamed").find(filter_string)
+            > -1
+        ]
+    else:
+        nested_runs = [create_nested_dict(parent_run) for parent_run in parent_runs]
+
+    return nested_runs
diff --git a/tests/unit/test_model_fitting.py b/tests/unit/test_model_fitting.py
new file mode 100644
index 0000000..7a4dcb2
--- /dev/null
+++ b/tests/unit/test_model_fitting.py
@@ -0,0 +1,38 @@
+from pathlib import Path
+
+from jsonargparse import ArgumentParser, Namespace
+from terratorch_iterate.iterate_types import Task
+import uuid
+import pytest
+
+
+@pytest.mark.skip()
+def test_launch_training():
+    # experiment_name='dofa_large_patch16_224_upernetdecoder_true_modified_continue_False_test_models_True' metric='val/loss' storage_uri='/dccstor/geofm-finetuning/terratorch-iterate-test/39d14a9ed79e4ee39739fa92a4cdd758/hpo' direction='max'
+    random_hex = uuid.uuid4().hex
+
+    storage_uri = Path(f"/tmp/{random_hex}")
+    if not storage_uri.exists():
+        storage_uri.mkdir()
+    parser = ArgumentParser()
+    config_path = (
+        Path(__file__).parent.parent.parent
+        / "configs/tests/dofa_large_patch16_224_upernetdecoder_true_modified.yaml"
+    )
+    assert config_path.exists()
+    config = parser.parse_path(config_path)
+    config_init: Namespace = parser.instantiate_classes(config)
+    tasks = config_init.tasks
+    assert isinstance(tasks, list), f"Error! {tasks=} is not a list"
+    for t in tasks:
+        assert isinstance(t, Task), f"Error! {t=} is not a Task"
+    # data_module = MNzCattleNonGeoDataModule()
+    # trainer = Trainer(**training_spec_copy.trainer_args)
+    # launch_training(
+    #     trainer=trainer,
+    #     datamodule=datamodule,
+    #     experiment_name=experiment_name,
+    #     metric=metric,
+    #     direction=direction,
+    #     storage_uri=storage_uri,
+    # )