From b4de38f45d85332b909c46d88abee442df2d2617 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 25 Mar 2026 17:01:46 +0000 Subject: [PATCH 1/7] work on enabling errors for gpu-disabled devices --- httomo/cli.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index 086dcc6f8..246661dda 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -259,7 +259,7 @@ def run( ) pipeline = generate_pipeline( in_data_file, pipeline, save_all, method_wrapper_comm, format_enum - ) + ) if not does_contain_sweep: execute_high_throughput_run( @@ -283,6 +283,11 @@ def _check_yaml(yaml_config: Path, in_data: Path): """Check a YAML pipeline file for errors.""" return validate_yaml_config(yaml_config, in_data) +def _check_pipeline_cpu_or_gpu(pipeline: Pipeline) -> bool: + for _, method in enumerate(pipeline): + if 'gpu' in method._module_path: + return True + return False def transform_limit_str_to_bytes(limit_str: str): try: @@ -299,10 +304,13 @@ def transform_limit_str_to_bytes(limit_str: str): raise ValueError(f"invalid memory limit string {limit_str}") -def _set_gpu_id(gpu_id: int): +def _set_gpu_id(gpu_id: int, pipeline_needs_gpu: bool): try: import cupy as cp + if not cp.cuda.is_available() and pipeline_needs_gpu: + raise ImportError("This pipeline requires an access to the GPU-enabled machine.") + gpu_count = cp.cuda.runtime.getDeviceCount() if gpu_id != -1: @@ -316,8 +324,11 @@ def _set_gpu_id(gpu_id: int): httomo.globals.gpu_id = gpu_id except ImportError: - pass # silently pass and run if the CPU pipeline is given - + # we handle two cases here: 1. CPU pipeline is given (continue). 2. GPU pipeline is given (raise error). + if pipeline_needs_gpu: + raise ImportError("This pipeline requires an access to the GPU-enabled machine.") + else: + pass def set_global_constants( out_dir: Path, @@ -408,8 +419,10 @@ def execute_high_throughput_run( ) -> None: # we use half the memory for blocks since we typically have inputs/output memory_limit = transform_limit_str_to_bytes(max_memory) // 2 + + pipeline_needs_gpu = _check_pipeline_cpu_or_gpu(pipeline) - _set_gpu_id(gpu_id) + _set_gpu_id(gpu_id, pipeline_needs_gpu) # Run the pipeline using Taskrunner, with temp dir or reslice dir mon = make_monitors(monitor, global_comm) From d6c80e2629d90bf4430c1ea9b3190a373a27fe12 Mon Sep 17 00:00:00 2001 From: algol Date: Mon, 30 Mar 2026 10:56:29 +0100 Subject: [PATCH 2/7] moves import of httomo backends in sweep runner --- httomo/sweep_runner/param_sweep_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/httomo/sweep_runner/param_sweep_runner.py b/httomo/sweep_runner/param_sweep_runner.py index 336fa397b..4d4454ac8 100644 --- a/httomo/sweep_runner/param_sweep_runner.py +++ b/httomo/sweep_runner/param_sweep_runner.py @@ -20,9 +20,6 @@ from httomo.runner.gpu_utils import get_available_gpu_memory, gpumem_cleanup from httomo.preview import PreviewConfig, PreviewDimConfig from httomo.runner.dataset_store_interfaces import DataSetSource -from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.phase import ( - _calc_memory_bytes_for_slices_paganin_filter, -) class ParamSweepRunner: @@ -312,12 +309,16 @@ def _preview_modifier( def _slices_to_fit_memory_Paganin(source: DataSetSource) -> int: """ Estimating the number of vertical slices that can fit on the device for running the Paganin method. + This function assumes that it is running on the GPU-enabled machine For the Paganin method, the filter kernel width can vary. Therefore, we aim to use the tallest possible vertical preview that the current device can accommodate. If the kernel width exceeds the height of the vertical preview, some deviations are expected between the sweep-run results and the results obtained from processing the full dataset. """ + from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.phase import ( + _calc_memory_bytes_for_slices_paganin_filter, + ) available_memory = get_available_gpu_memory(10.0) angles_total = source.aux_data.angles_length det_X_length = source.chunk_shape[2] From 428f97cc4f82ef933cfac7b4ab139410c2fe1996 Mon Sep 17 00:00:00 2001 From: algol Date: Mon, 30 Mar 2026 12:46:45 +0100 Subject: [PATCH 3/7] fixes to run cpu pipeline on a cpu only machine --- httomo/cli.py | 34 ++++++++++++++++-------------- httomo/method_wrappers/generic.py | 2 +- httomo/method_wrappers/rotation.py | 5 +++-- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index a1bf78705..74a503bbb 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -294,8 +294,9 @@ def _check_yaml(yaml_config: Path, in_data: Path): def _check_pipeline_cpu_or_gpu(pipeline: Pipeline) -> bool: for _, method in enumerate(pipeline): - if 'gpu' in method._module_path: + if 'gpu' in method.module_path: return True + :param data_slice: a 2D array to save as return False def transform_limit_str_to_bytes(limit_str: str): @@ -317,27 +318,28 @@ def _set_gpu_id(gpu_id: int, pipeline_needs_gpu: bool): try: import cupy as cp - if not cp.cuda.is_available() and pipeline_needs_gpu: - raise ImportError("This pipeline requires an access to the GPU-enabled machine.") - - gpu_count = cp.cuda.runtime.getDeviceCount() - - if gpu_id != -1: - if gpu_id not in range(0, gpu_count): - raise ValueError( - f"GPU Device not available for access. Use a GPU ID in the range: 0 to {gpu_count} (exclusive)" - ) + if cp.cuda.is_available(): + gpu_count = cp.cuda.runtime.getDeviceCount() - cp.cuda.Device(gpu_id).use() + if gpu_id != -1: + if gpu_id not in range(0, gpu_count): + raise ValueError( + f"GPU Device not available for access. Use a GPU ID in the range: 0 to {gpu_count} (exclusive)" + ) - httomo.globals.gpu_id = gpu_id + cp.cuda.Device(gpu_id).use() + httomo.globals.gpu_id = gpu_id + else: + if pipeline_needs_gpu: + raise ImportError("This pipeline requires an access to the GPU-enabled machine.") + else: + httomo.globals.gpu_id = None except ImportError: - # we handle two cases here: 1. CPU pipeline is given (continue). 2. GPU pipeline is given (raise error). + # the edge case when cupy is not installed since cupy is the dependency if pipeline_needs_gpu: raise ImportError("This pipeline requires an access to the GPU-enabled machine.") - else: - pass + def set_global_constants( out_dir: Path, diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 766299fc0..8a516248d 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -326,7 +326,7 @@ def execute(self, block: T) -> T: block = self._run_method(block, args) block = self._postprocess_data(block) - if xp.get_array_module(block.data).__name__ == "cupy": + if httomo.globals.gpu_id is not None: self._gpu_time_info.kernel = t.elapsed else: self._gpu_time_info.kernel = 0 diff --git a/httomo/method_wrappers/rotation.py b/httomo/method_wrappers/rotation.py index cf1f4a05a..61da11655 100644 --- a/httomo/method_wrappers/rotation.py +++ b/httomo/method_wrappers/rotation.py @@ -5,7 +5,7 @@ from httomo.utils import catchtime, log_once, xp from httomo_backends.methods_database.query import Pattern - +import httomo.globals import numpy as np from mpi4py import MPI @@ -209,7 +209,8 @@ def _run_method(self, block: T, args: Dict[str, Any]) -> T: self._gpu_time_info.host2device += t.elapsed if not self.cupyrun: with catchtime() as t: - sino_slice = xp.asnumpy(sino_slice) + if httomo.globals.gpu_id is not None: + sino_slice = xp.asnumpy(sino_slice) self._gpu_time_info.device2host += t.elapsed args["ind"] = 0 args[self.parameters[0]] = sino_slice[:, xp.newaxis, :] From 99049b958bd7316516556016b9cdc39d74b85103 Mon Sep 17 00:00:00 2001 From: dkazanc Date: Mon, 30 Mar 2026 13:03:03 +0100 Subject: [PATCH 4/7] reverting back to not have a check for cpu or gpu pipeline --- httomo/cli.py | 23 ++++------------------- httomo/method_wrappers/generic.py | 2 +- httomo/method_wrappers/rotation.py | 4 ++-- 3 files changed, 7 insertions(+), 22 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index 74a503bbb..7e1058739 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -292,13 +292,6 @@ def _check_yaml(yaml_config: Path, in_data: Path): """Check a YAML pipeline file for errors.""" return validate_yaml_config(yaml_config, in_data) -def _check_pipeline_cpu_or_gpu(pipeline: Pipeline) -> bool: - for _, method in enumerate(pipeline): - if 'gpu' in method.module_path: - return True - :param data_slice: a 2D array to save as - return False - def transform_limit_str_to_bytes(limit_str: str): try: limit_upper = limit_str.upper() @@ -314,7 +307,7 @@ def transform_limit_str_to_bytes(limit_str: str): raise ValueError(f"invalid memory limit string {limit_str}") -def _set_gpu_id(gpu_id: int, pipeline_needs_gpu: bool): +def _set_gpu_id(gpu_id: int): try: import cupy as cp @@ -330,16 +323,10 @@ def _set_gpu_id(gpu_id: int, pipeline_needs_gpu: bool): cp.cuda.Device(gpu_id).use() httomo.globals.gpu_id = gpu_id - else: - if pipeline_needs_gpu: - raise ImportError("This pipeline requires an access to the GPU-enabled machine.") - else: - httomo.globals.gpu_id = None + except ImportError: # the edge case when cupy is not installed since cupy is the dependency - if pipeline_needs_gpu: - raise ImportError("This pipeline requires an access to the GPU-enabled machine.") - + pass # running cpu pipeline def set_global_constants( out_dir: Path, @@ -432,10 +419,8 @@ def execute_high_throughput_run( ) -> None: # we use half the memory for blocks since we typically have inputs/output memory_limit = transform_limit_str_to_bytes(max_memory) // 2 - - pipeline_needs_gpu = _check_pipeline_cpu_or_gpu(pipeline) - _set_gpu_id(gpu_id, pipeline_needs_gpu) + _set_gpu_id(gpu_id) # Run the pipeline using Taskrunner, with temp dir or reslice dir mon = make_monitors(monitor, global_comm) diff --git a/httomo/method_wrappers/generic.py b/httomo/method_wrappers/generic.py index 8a516248d..a79ba2ae7 100644 --- a/httomo/method_wrappers/generic.py +++ b/httomo/method_wrappers/generic.py @@ -326,7 +326,7 @@ def execute(self, block: T) -> T: block = self._run_method(block, args) block = self._postprocess_data(block) - if httomo.globals.gpu_id is not None: + if gpu_enabled: self._gpu_time_info.kernel = t.elapsed else: self._gpu_time_info.kernel = 0 diff --git a/httomo/method_wrappers/rotation.py b/httomo/method_wrappers/rotation.py index 61da11655..4b573844b 100644 --- a/httomo/method_wrappers/rotation.py +++ b/httomo/method_wrappers/rotation.py @@ -2,7 +2,7 @@ from httomo.method_wrappers.generic import GenericMethodWrapper from httomo.runner.method_wrapper import MethodParameterDictType from httomo.runner.methods_repository_interface import MethodRepository -from httomo.utils import catchtime, log_once, xp +from httomo.utils import catchtime, log_once, xp, gpu_enabled from httomo_backends.methods_database.query import Pattern import httomo.globals @@ -209,7 +209,7 @@ def _run_method(self, block: T, args: Dict[str, Any]) -> T: self._gpu_time_info.host2device += t.elapsed if not self.cupyrun: with catchtime() as t: - if httomo.globals.gpu_id is not None: + if gpu_enabled: sino_slice = xp.asnumpy(sino_slice) self._gpu_time_info.device2host += t.elapsed args["ind"] = 0 From 226f4133ab1ff763bee86784ee76cabe481b5ab2 Mon Sep 17 00:00:00 2001 From: algol Date: Mon, 30 Mar 2026 13:14:42 +0100 Subject: [PATCH 5/7] remove unused import for globals --- httomo/method_wrappers/rotation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/httomo/method_wrappers/rotation.py b/httomo/method_wrappers/rotation.py index 4b573844b..172b27af3 100644 --- a/httomo/method_wrappers/rotation.py +++ b/httomo/method_wrappers/rotation.py @@ -5,8 +5,6 @@ from httomo.utils import catchtime, log_once, xp, gpu_enabled from httomo_backends.methods_database.query import Pattern -import httomo.globals - import numpy as np from mpi4py import MPI from mpi4py.MPI import Comm From f30d0056de49ded7f3e4389ef159423de88b9941 Mon Sep 17 00:00:00 2001 From: algol Date: Mon, 30 Mar 2026 13:14:58 +0100 Subject: [PATCH 6/7] remove unused import for globals --- httomo/cli.py | 6 ++++-- httomo/sweep_runner/param_sweep_runner.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index 7e1058739..d619d2815 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -268,7 +268,7 @@ def run( ) pipeline = generate_pipeline( in_data_file, pipeline, save_all, method_wrapper_comm, format_enum - ) + ) if not does_contain_sweep: execute_high_throughput_run( @@ -292,6 +292,7 @@ def _check_yaml(yaml_config: Path, in_data: Path): """Check a YAML pipeline file for errors.""" return validate_yaml_config(yaml_config, in_data) + def transform_limit_str_to_bytes(limit_str: str): try: limit_upper = limit_str.upper() @@ -326,7 +327,8 @@ def _set_gpu_id(gpu_id: int): except ImportError: # the edge case when cupy is not installed since cupy is the dependency - pass # running cpu pipeline + pass # running cpu pipeline + def set_global_constants( out_dir: Path, diff --git a/httomo/sweep_runner/param_sweep_runner.py b/httomo/sweep_runner/param_sweep_runner.py index 4d4454ac8..3d1d61f93 100644 --- a/httomo/sweep_runner/param_sweep_runner.py +++ b/httomo/sweep_runner/param_sweep_runner.py @@ -318,7 +318,8 @@ def _slices_to_fit_memory_Paganin(source: DataSetSource) -> int: """ from httomo_backends.methods_database.packages.backends.httomolibgpu.supporting_funcs.prep.phase import ( _calc_memory_bytes_for_slices_paganin_filter, - ) + ) + available_memory = get_available_gpu_memory(10.0) angles_total = source.aux_data.angles_length det_X_length = source.chunk_shape[2] From 3fffa8733b70fd807a6c04180c58d793246d155d Mon Sep 17 00:00:00 2001 From: dkazanc Date: Wed, 1 Apr 2026 14:08:09 +0100 Subject: [PATCH 7/7] adding logging --- httomo/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/httomo/cli.py b/httomo/cli.py index d619d2815..cefee2830 100644 --- a/httomo/cli.py +++ b/httomo/cli.py @@ -16,7 +16,7 @@ from httomo.runner.pipeline import Pipeline from httomo.sweep_runner.param_sweep_runner import ParamSweepRunner from httomo.transform_layer import TransformLayer -from httomo.utils import log_exception, mpi_abort_excepthook +from httomo.utils import log_exception, log_once, mpi_abort_excepthook from httomo.yaml_checker import validate_yaml_config from httomo.runner.task_runner import TaskRunner from httomo.ui_layer import UiLayer, PipelineFormat @@ -322,13 +322,12 @@ def _set_gpu_id(gpu_id: int): ) cp.cuda.Device(gpu_id).use() + else: + log_once("CuPy is installed but the GPU device is inaccessible. Only CPU pipelines would work.") httomo.globals.gpu_id = gpu_id - - except ImportError: - # the edge case when cupy is not installed since cupy is the dependency - pass # running cpu pipeline - + except ImportError as e: + log_exception(f"CuPy is not installed {e}. Only CPU pipelines would work.") def set_global_constants( out_dir: Path,