diff --git a/README.rst b/README.rst index 932e9c56..359d23ab 100644 --- a/README.rst +++ b/README.rst @@ -9,7 +9,7 @@ Some of the methods also have been optimised to ensure higher computational effi The purpose of HTTomolibGPU =========================== -Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the +Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the `HTTomo `_ package as its backend for data processing. HTTomo is a user interface (UI) written in Python for fast big tomographic data processing using MPI protocols or as well serially. @@ -34,7 +34,7 @@ Conda environment $ conda create --name httomolibgpu # create a fresh conda environment $ conda activate httomolibgpu # activate the environment - $ conda install conda-forge::cupy==12.3.0 + $ conda install conda-forge::cupy==14.0.* $ pip install httomolibgpu Setup the development environment: @@ -43,6 +43,6 @@ Setup the development environment: .. code-block:: console $ git clone git@github.com:DiamondLightSource/httomolibgpu.git # clone the repo - $ conda env create --name httomolibgpu -c conda-forge cupy==12.3.0 # install dependencies + $ conda env create --name httomolibgpu -c conda-forge cupy==14.0.* # install dependencies $ conda activate httomolibgpu # activate the environment $ pip install -e ./httomolibgpu[dev] # editable/development mode diff --git a/httomolibgpu/cuda_kernels/__init__.py b/httomolibgpu/cuda_kernels/__init__.py index cfb219a3..56a97eed 100644 --- a/httomolibgpu/cuda_kernels/__init__.py +++ b/httomolibgpu/cuda_kernels/__init__.py @@ -25,5 +25,5 @@ def load_cuda_module( code += f.read() return cp.RawModule( - options=("-std=c++11", *options), code=code, name_expressions=name_expressions + options=("-std=c++17", *options), code=code, name_expressions=name_expressions ) diff --git a/httomolibgpu/prep/normalize.py b/httomolibgpu/prep/normalize.py index 8a1a3a3c..e805a241 100644 --- a/httomolibgpu/prep/normalize.py +++ b/httomolibgpu/prep/normalize.py @@ -123,7 +123,7 @@ def dark_flat_field_correction( "float32 out", kernel, kernel_name, - options=("-std=c++11",), + options=("-std=c++17",), loop_prep="constexpr float eps = 1.0e-07;", no_return=True, ) diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py index bbd29c71..dae69377 100644 --- a/httomolibgpu/prep/phase.py +++ b/httomolibgpu/prep/phase.py @@ -207,7 +207,7 @@ def paganin_filter( mem_stack.free(ifft_plan.work_area.mem.size) else: with ifft_plan: - ifft_filtered_data = ifft2(fft_data, axes=(-2, -1), overwrite_x=True).real + ifft_filtered_data = ifft2(fft_data, axes=(-2, -1), overwrite_x=True) del fft_data del ifft_plan del ifft_input @@ -230,6 +230,7 @@ def paganin_filter( return mem_stack.highwater # crop the padded filtered data: + # .real should not be used for now, see: https://github.com/cupy/cupy/issues/9750 data = ifft_filtered_data[slc_indices].astype(cp.float32) del ifft_filtered_data diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py index d58a4d7a..aeaae130 100644 --- a/httomolibgpu/recon/algorithm.py +++ b/httomolibgpu/recon/algorithm.py @@ -405,12 +405,12 @@ def SIRT3d_tomobar( center, detector_pad, recon_size, + 1, gpu_id, - datafidelity="LS", ) _data_ = { - "projection_norm_data": data, + "projection_data": data, "data_axes_labels_order": input_data_axis_labels, } # data dictionary _algorithm_ = { @@ -484,11 +484,17 @@ def CGLS3d_tomobar( ################################### RecToolsCP = _instantiate_iterative_recon_class( - data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS" + data, + angles, + center, + detector_pad, + recon_size, + 1, + gpu_id, ) _data_ = { - "projection_norm_data": data, + "projection_data": data, "data_axes_labels_order": input_data_axis_labels, } # data dictionary _algorithm_ = { @@ -511,6 +517,7 @@ def FISTA3d_tomobar( recon_mask_radius: Optional[float] = 0.95, iterations: int = 20, subsets_number: int = 6, + data_fidelity: str = "LS", regularisation_type: Literal["ROF_TV", "PD_TV"] = "PD_TV", regularisation_parameter: float = 0.000001, regularisation_iterations: int = 50, @@ -544,6 +551,8 @@ def FISTA3d_tomobar( The number of FISTA algorithm iterations. subsets_number: int The number of the ordered subsets to accelerate convergence. Keep the value bellow 10 to avoid divergence. + data_fidelity: str + Data fidelity given as 'LS' (Least Squares), 'PWLS' (Penalised Weightes LS). regularisation_type: str A method to use for regularisation. Currently PD_TV and ROF_TV are available. regularisation_parameter: float @@ -586,19 +595,25 @@ def FISTA3d_tomobar( ################################### RecToolsCP = _instantiate_iterative_recon_class( - data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS" + data, + angles, + center, + detector_pad, + recon_size, + subsets_number, + gpu_id, ) _data_ = { - "projection_norm_data": data, - "OS_number": subsets_number, + "projection_data": data, + "data_fidelity": data_fidelity, "data_axes_labels_order": input_data_axis_labels, } lc = RecToolsCP.powermethod(_data_) # calculate Lipschitz constant (run once) _algorithm_ = { "iterations": iterations, - "lipschitz_const": lc.get(), + "lipschitz_const": lc, "nonnegativity": nonnegativity, "recon_mask_radius": recon_mask_radius, } @@ -625,7 +640,8 @@ def ADMM3d_tomobar( recon_mask_radius: Optional[float] = 0.95, iterations: int = 3, subsets_number: int = 24, - initialisation: Literal["FBP", "CGLS", None] = "FBP", + data_fidelity: str = "LS", + initialisation: Literal["FBP", "CGLS", "SIRT", None] = "FBP", ADMM_rho_const: float = 1.0, ADMM_relax_par: float = 1.7, regularisation_type: str = "PD_TV", @@ -662,9 +678,11 @@ def ADMM3d_tomobar( more than 10 without. Assuming that the subsets_number is reasonably large (>12). subsets_number: int The number of the ordered subsets to accelerate convergence. The recommended range is between 12 to 24. + data_fidelity: str + Data fidelity given as 'LS' (Least Squares), 'PWLS' (Penalised Weightes LS). initialisation: str, optional - Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' when data - is noisy and undersampled, 'FBP' when data is of better quality (default) or None. + Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' or 'SIRT' when data + is noisy and/or undersampled. Choose 'FBP' when the data is of better quality (default) or None. ADMM_rho_const: float Convergence related parameter for ADMM, higher values lead to slower convergence, but too small values can destabilise the iterations. Recommended range is between 0.9 and 2.0. @@ -712,7 +730,7 @@ def ADMM3d_tomobar( initialisation, [str, type(None)], "initialisation", - ["FBP", "CGLS"], + ["FBP", "CGLS", "SIRT"], methods_name, ) __check_variable_type(ADMM_rho_const, [float], "ADMM_rho_const", [], methods_name) @@ -759,152 +777,18 @@ def ADMM3d_tomobar( ), requirements="C", ) - else: - initialisation_vol = None - - RecToolsCP = _instantiate_iterative_recon_class( - data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS" - ) - - _data_ = { - "projection_norm_data": data, - "OS_number": subsets_number, - "data_axes_labels_order": input_data_axis_labels, - } - - _algorithm_ = { - "initialise": initialisation_vol, - "iterations": iterations, - "nonnegativity": nonnegativity, - "recon_mask_radius": recon_mask_radius, - "ADMM_rho_const": ADMM_rho_const, - "ADMM_relax_par": ADMM_relax_par, - } - - _regularisation_ = { - "method": regularisation_type, # Selected regularisation method - "regul_param": regularisation_parameter, # Regularisation parameter - "iterations": regularisation_iterations, # The number of regularisation iterations - "half_precision": regularisation_half_precision, # enabling half-precision calculation - } - - reconstruction = RecToolsCP.ADMM(_data_, _algorithm_, _regularisation_) - cp._default_memory_pool.free_all_blocks() - return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C") - - -## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% ## -def _instantiate_direct_recon_class( - data: cp.ndarray, - angles: np.ndarray, - center: Optional[float] = None, - detector_pad: Union[bool, int] = False, - recon_size: Optional[int] = None, - recon_mask_radius: float = 0.95, - iterations: int = 3, - subsets_number: int = 24, - initialisation: Optional[str] = "FBP", - ADMM_rho_const: float = 1.0, - ADMM_relax_par: float = 1.7, - regularisation_type: str = "PD_TV", - regularisation_parameter: float = 0.0025, - regularisation_iterations: int = 40, - regularisation_half_precision: bool = True, - nonnegativity: bool = False, - gpu_id: int = 0, -) -> cp.ndarray: - """ - An Alternating Direction Method of Multipliers method with various types of regularisation or - denoising operations :cite:`kazantsev2019ccpi` (currently accepts ROF_TV and PD_TV regularisations only). - For more information see :ref:`_method_ADMM3d_tomobar`. - - Parameters - ---------- - data : cp.ndarray - Projection data as a CuPy array. - angles : np.ndarray - An array of angles given in radians. - center : float, optional - The center of rotation (CoR). - detector_pad : bool, int - Detector width padding with edge values to remove circle/arc type artifacts in the reconstruction. Set to True to perform - an automated padding or specify a certain value as an integer. - recon_size : int, optional - The [recon_size, recon_size] shape of the reconstructed slice in pixels. - By default (None), the reconstructed size will be the dimension of the horizontal detector. - recon_mask_radius: float - The radius of the circular mask that applies to the reconstructed slice in order to crop - out some undesirable artifacts. The values outside the given diameter will be set to zero. - To implement the cropping one can use the range [0.7-1.0] or set to 2.0 when no cropping required. - iterations : int - The number of ADMM algorithm iterations. The recommended range is between 3 to 5 with initialisation and - more than 10 without. Assuming that the subsets_number is reasonably large (>12). - subsets_number: int - The number of the ordered subsets to accelerate convergence. The recommended range is between 12 to 24. - initialisation: str, optional - Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' when data - is noisy and undersampled, 'FBP' when data is of better quality (default) or None. - ADMM_rho_const: float - Convergence related parameter for ADMM, higher values lead to slower convergence, but too small values can destabilise the iterations. - Recommended range is between 0.9 and 2.0. - ADMM_relax_par: Relaxation parameter which can lead to acceleration of the algorithm, keep it in the range between 1.5 and 1.8 to avoid divergence. regularisation_type: str - A method to use for regularisation. Currently PD_TV and ROF_TV are available. - regularisation_parameter: float - The main regularisation parameter to control the amount of smoothing/noise removal. Larger values lead to stronger smoothing. - regularisation_iterations: int - The number of iterations for regularisers (aka INNER iterations). - regularisation_half_precision: bool - Perform faster regularisation computation in half-precision with a very minimal sacrifice in quality. - nonnegativity : bool - Impose nonnegativity constraint (set to True) on the reconstructed image. Default False. - gpu_id : int - A GPU device index to perform operation on. - - Returns - ------- - cp.ndarray - The ADMM reconstructed volume as a CuPy array. - """ - if initialisation not in ["FBP", "CGLS", None]: - raise ValueError( - "The acceptable values for initialisation are 'FBP','CGLS' and None" - ) - - if initialisation is not None: - if detector_pad == True: - detector_pad = __estimate_detectorHoriz_padding(data.shape[2]) - - if detector_pad > 0: - # if detector_pad is not zero we need to reconstruct the image on the recon+2*detector_pad size - recon_size = data.shape[2] + 2 * detector_pad - - if initialisation == "FBP": + elif initialisation == "SIRT": initialisation_vol = cp.require( cp.swapaxes( - FBP3d_tomobar( + SIRT3d_tomobar( data, angles=angles, center=center, detector_pad=detector_pad, recon_size=recon_size, recon_mask_radius=recon_mask_radius, - ), - 0, - 1, - ), - requirements="C", - ) - elif initialisation == "CGLS": - initialisation_vol = cp.require( - cp.swapaxes( - CGLS3d_tomobar( - data, - angles=angles, - center=center, - detector_pad=detector_pad, - recon_size=recon_size, - recon_mask_radius=recon_mask_radius, - iterations=15, + iterations=150, + nonnegativity=True, ), 0, 1, @@ -915,12 +799,18 @@ def _instantiate_direct_recon_class( initialisation_vol = None RecToolsCP = _instantiate_iterative_recon_class( - data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS" + data, + angles, + center, + detector_pad, + recon_size, + subsets_number, + gpu_id, ) _data_ = { - "projection_norm_data": data, - "OS_number": subsets_number, + "projection_data": data, + "data_fidelity": data_fidelity, "data_axes_labels_order": input_data_axis_labels, } @@ -1044,8 +934,8 @@ def _instantiate_iterative_recon_class( center: Optional[float] = None, detector_pad: Union[bool, int] = False, recon_size: Optional[int] = None, + OS_number: int = 1, gpu_id: int = 0, - datafidelity: str = "LS", ) -> Type: """instantiate ToMoBAR's iterative recon class @@ -1055,8 +945,8 @@ def _instantiate_iterative_recon_class( center (Optional[float], optional): center of recon. Defaults to None. detector_pad : (Union[bool, int]) : Detector width padding. Defaults to False. recon_size (Optional[int], optional): recon_size. Defaults to None. - datafidelity (str, optional): Data fidelity - gpu_id (int, optional): gpu ID. Defaults to 0. + OS_number (int): The number of ordered subsets, Defaults to 1. + gpu_id (int): gpu ID. Defaults to 0. Returns: Type[RecToolsIRCuPy]: an instance of the iterative class @@ -1072,14 +962,14 @@ def _instantiate_iterative_recon_class( RecToolsCP = RecToolsIRCuPy( DetectorsDimH=data.shape[2], # Horizontal detector dimension DetectorsDimH_pad=detector_pad, # padding for horizontal detector - DetectorsDimV=data.shape[1], # Vertical detector dimension (3D case) + DetectorsDimV=data.shape[1], # Vertical detector dimension CenterRotOffset=data.shape[2] / 2 - center - 0.5, # Center of Rotation scalar or a vector AnglesVec=-angles, # A vector of projection angles in radians ObjSize=recon_size, # Reconstructed object dimensions (scalar) - datafidelity=datafidelity, - device_projector=gpu_id, + device_projector=gpu_id, # device number + OS_number=OS_number, # the number of ordered subsets ) return RecToolsCP diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py index ec2554e4..6cab12c0 100644 --- a/httomolibgpu/recon/rotation.py +++ b/httomolibgpu/recon/rotation.py @@ -240,9 +240,9 @@ def _search_fine(sino, srad, step, init_cen, ratio, drop): mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop) cen_fliplr = (ncol - 1.0) / 2.0 - srad = np.clip(np.abs(srad), 1, ncol // 10 - 1) - step = np.clip(np.abs(step), 0.1, 1.1) - init_cen = np.clip(init_cen, srad, ncol - srad - 1) + srad = np.clip(np.abs(srad), 1, ncol // 10 - 1, dtype=np.float32) + step = np.clip(np.abs(step), 0.1, 1.1, dtype=np.float32) + init_cen = np.clip(init_cen, srad, ncol - srad - 1, dtype=np.float32) list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=cp.float32) list_shift = 2.0 * (list_cor - cen_fliplr) list_metric = cp.empty(list_shift.shape, dtype="float32") diff --git a/pyproject.toml b/pyproject.toml index 9e448e2a..78d34f4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,15 +31,15 @@ authors = [ classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: BSD License", - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.12", "Environment :: GPU :: NVIDIA CUDA" ] -requires-python = ">=3.10" +requires-python = ">=3.12" dynamic = ["version"] dependencies = [ - "cupy==12.3.0", + "cupy==14.0.*", "nvtx", - "numpy", + "numpy==2.4.*", "scipy", "pillow", "scikit-image",