diff --git a/README.rst b/README.rst
index 932e9c56..359d23ab 100644
--- a/README.rst
+++ b/README.rst
@@ -9,7 +9,7 @@ Some of the methods also have been optimised to ensure higher computational effi
 The purpose of HTTomolibGPU
 ===========================
 
-Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the 
+Although **HTTomolibGPU** can be used as a stand-alone library, it has been specifically developed to work together with the
 `HTTomo <https://diamondlightsource.github.io/httomo/>`_ package as
 its backend for data processing. HTTomo is a user interface (UI) written in Python for fast big tomographic data processing using
 MPI protocols or as well serially.
@@ -34,7 +34,7 @@ Conda environment
 
    $ conda create --name httomolibgpu # create a fresh conda environment
    $ conda activate httomolibgpu # activate the environment
-   $ conda install conda-forge::cupy==12.3.0
+   $ conda install conda-forge::cupy==14.0.*
    $ pip install httomolibgpu
 
 Setup the development environment:
@@ -43,6 +43,6 @@ Setup the development environment:
 .. code-block:: console
 
    $ git clone git@github.com:DiamondLightSource/httomolibgpu.git # clone the repo
-   $ conda env create --name httomolibgpu -c conda-forge cupy==12.3.0 # install dependencies
+   $ conda env create --name httomolibgpu -c conda-forge cupy==14.0.* # install dependencies
    $ conda activate httomolibgpu # activate the environment
    $ pip install -e ./httomolibgpu[dev] # editable/development mode
diff --git a/httomolibgpu/cuda_kernels/__init__.py b/httomolibgpu/cuda_kernels/__init__.py
index cfb219a3..56a97eed 100644
--- a/httomolibgpu/cuda_kernels/__init__.py
+++ b/httomolibgpu/cuda_kernels/__init__.py
@@ -25,5 +25,5 @@ def load_cuda_module(
         code += f.read()
 
     return cp.RawModule(
-        options=("-std=c++11", *options), code=code, name_expressions=name_expressions
+        options=("-std=c++17", *options), code=code, name_expressions=name_expressions
     )
diff --git a/httomolibgpu/prep/normalize.py b/httomolibgpu/prep/normalize.py
index 8a1a3a3c..e805a241 100644
--- a/httomolibgpu/prep/normalize.py
+++ b/httomolibgpu/prep/normalize.py
@@ -123,7 +123,7 @@ def dark_flat_field_correction(
         "float32 out",
         kernel,
         kernel_name,
-        options=("-std=c++11",),
+        options=("-std=c++17",),
         loop_prep="constexpr float eps = 1.0e-07;",
         no_return=True,
     )
diff --git a/httomolibgpu/prep/phase.py b/httomolibgpu/prep/phase.py
index bbd29c71..dae69377 100644
--- a/httomolibgpu/prep/phase.py
+++ b/httomolibgpu/prep/phase.py
@@ -207,7 +207,7 @@ def paganin_filter(
         mem_stack.free(ifft_plan.work_area.mem.size)
     else:
         with ifft_plan:
-            ifft_filtered_data = ifft2(fft_data, axes=(-2, -1), overwrite_x=True).real
+            ifft_filtered_data = ifft2(fft_data, axes=(-2, -1), overwrite_x=True)
         del fft_data
     del ifft_plan
     del ifft_input
@@ -230,6 +230,7 @@ def paganin_filter(
         return mem_stack.highwater
 
     # crop the padded filtered data:
+    # .real should not be used for now, see: https://github.com/cupy/cupy/issues/9750
     data = ifft_filtered_data[slc_indices].astype(cp.float32)
     del ifft_filtered_data
 
diff --git a/httomolibgpu/recon/algorithm.py b/httomolibgpu/recon/algorithm.py
index d58a4d7a..aeaae130 100644
--- a/httomolibgpu/recon/algorithm.py
+++ b/httomolibgpu/recon/algorithm.py
@@ -405,12 +405,12 @@ def SIRT3d_tomobar(
         center,
         detector_pad,
         recon_size,
+        1,
         gpu_id,
-        datafidelity="LS",
     )
 
     _data_ = {
-        "projection_norm_data": data,
+        "projection_data": data,
         "data_axes_labels_order": input_data_axis_labels,
     }  # data dictionary
     _algorithm_ = {
@@ -484,11 +484,17 @@ def CGLS3d_tomobar(
     ###################################
 
     RecToolsCP = _instantiate_iterative_recon_class(
-        data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS"
+        data,
+        angles,
+        center,
+        detector_pad,
+        recon_size,
+        1,
+        gpu_id,
     )
 
     _data_ = {
-        "projection_norm_data": data,
+        "projection_data": data,
         "data_axes_labels_order": input_data_axis_labels,
     }  # data dictionary
     _algorithm_ = {
@@ -511,6 +517,7 @@ def FISTA3d_tomobar(
     recon_mask_radius: Optional[float] = 0.95,
     iterations: int = 20,
     subsets_number: int = 6,
+    data_fidelity: str = "LS",
     regularisation_type: Literal["ROF_TV", "PD_TV"] = "PD_TV",
     regularisation_parameter: float = 0.000001,
     regularisation_iterations: int = 50,
@@ -544,6 +551,8 @@ def FISTA3d_tomobar(
         The number of FISTA algorithm iterations.
     subsets_number: int
         The number of the ordered subsets to accelerate convergence. Keep the value bellow 10 to avoid divergence.
+    data_fidelity: str
+        Data fidelity given as 'LS' (Least Squares), 'PWLS' (Penalised Weightes LS).
     regularisation_type: str
         A method to use for regularisation. Currently PD_TV and ROF_TV are available.
     regularisation_parameter: float
@@ -586,19 +595,25 @@ def FISTA3d_tomobar(
     ###################################
 
     RecToolsCP = _instantiate_iterative_recon_class(
-        data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS"
+        data,
+        angles,
+        center,
+        detector_pad,
+        recon_size,
+        subsets_number,
+        gpu_id,
     )
 
     _data_ = {
-        "projection_norm_data": data,
-        "OS_number": subsets_number,
+        "projection_data": data,
+        "data_fidelity": data_fidelity,
         "data_axes_labels_order": input_data_axis_labels,
     }
     lc = RecToolsCP.powermethod(_data_)  # calculate Lipschitz constant (run once)
 
     _algorithm_ = {
         "iterations": iterations,
-        "lipschitz_const": lc.get(),
+        "lipschitz_const": lc,
         "nonnegativity": nonnegativity,
         "recon_mask_radius": recon_mask_radius,
     }
@@ -625,7 +640,8 @@ def ADMM3d_tomobar(
     recon_mask_radius: Optional[float] = 0.95,
     iterations: int = 3,
     subsets_number: int = 24,
-    initialisation: Literal["FBP", "CGLS", None] = "FBP",
+    data_fidelity: str = "LS",
+    initialisation: Literal["FBP", "CGLS", "SIRT", None] = "FBP",
     ADMM_rho_const: float = 1.0,
     ADMM_relax_par: float = 1.7,
     regularisation_type: str = "PD_TV",
@@ -662,9 +678,11 @@ def ADMM3d_tomobar(
         more than 10 without. Assuming that the subsets_number is reasonably large (>12).
     subsets_number: int
         The number of the ordered subsets to accelerate convergence. The recommended range is between 12 to 24.
+    data_fidelity: str
+        Data fidelity given as 'LS' (Least Squares), 'PWLS' (Penalised Weightes LS).
     initialisation: str, optional
-        Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' when data
-        is noisy and undersampled, 'FBP' when data is of better quality (default) or None.
+        Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' or 'SIRT' when data
+        is noisy and/or undersampled. Choose 'FBP' when the data is of better quality (default) or None.
     ADMM_rho_const: float
         Convergence related parameter for ADMM, higher values lead to slower convergence, but too small values can destabilise the iterations.
         Recommended range is between 0.9 and 2.0.
@@ -712,7 +730,7 @@ def ADMM3d_tomobar(
         initialisation,
         [str, type(None)],
         "initialisation",
-        ["FBP", "CGLS"],
+        ["FBP", "CGLS", "SIRT"],
         methods_name,
     )
     __check_variable_type(ADMM_rho_const, [float], "ADMM_rho_const", [], methods_name)
@@ -759,152 +777,18 @@ def ADMM3d_tomobar(
                 ),
                 requirements="C",
             )
-    else:
-        initialisation_vol = None
-
-    RecToolsCP = _instantiate_iterative_recon_class(
-        data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS"
-    )
-
-    _data_ = {
-        "projection_norm_data": data,
-        "OS_number": subsets_number,
-        "data_axes_labels_order": input_data_axis_labels,
-    }
-
-    _algorithm_ = {
-        "initialise": initialisation_vol,
-        "iterations": iterations,
-        "nonnegativity": nonnegativity,
-        "recon_mask_radius": recon_mask_radius,
-        "ADMM_rho_const": ADMM_rho_const,
-        "ADMM_relax_par": ADMM_relax_par,
-    }
-
-    _regularisation_ = {
-        "method": regularisation_type,  # Selected regularisation method
-        "regul_param": regularisation_parameter,  # Regularisation parameter
-        "iterations": regularisation_iterations,  # The number of regularisation iterations
-        "half_precision": regularisation_half_precision,  # enabling half-precision calculation
-    }
-
-    reconstruction = RecToolsCP.ADMM(_data_, _algorithm_, _regularisation_)
-    cp._default_memory_pool.free_all_blocks()
-    return cp.require(cp.swapaxes(reconstruction, 0, 1), requirements="C")
-
-
-## %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ##
-def _instantiate_direct_recon_class(
-    data: cp.ndarray,
-    angles: np.ndarray,
-    center: Optional[float] = None,
-    detector_pad: Union[bool, int] = False,
-    recon_size: Optional[int] = None,
-    recon_mask_radius: float = 0.95,
-    iterations: int = 3,
-    subsets_number: int = 24,
-    initialisation: Optional[str] = "FBP",
-    ADMM_rho_const: float = 1.0,
-    ADMM_relax_par: float = 1.7,
-    regularisation_type: str = "PD_TV",
-    regularisation_parameter: float = 0.0025,
-    regularisation_iterations: int = 40,
-    regularisation_half_precision: bool = True,
-    nonnegativity: bool = False,
-    gpu_id: int = 0,
-) -> cp.ndarray:
-    """
-    An Alternating Direction Method of Multipliers method with various types of regularisation or
-    denoising operations :cite:`kazantsev2019ccpi` (currently accepts ROF_TV and PD_TV regularisations only).
-    For more information see :ref:`_method_ADMM3d_tomobar`.
-
-    Parameters
-    ----------
-    data : cp.ndarray
-        Projection data as a CuPy array.
-    angles : np.ndarray
-        An array of angles given in radians.
-    center : float, optional
-        The center of rotation (CoR).
-    detector_pad : bool, int
-        Detector width padding with edge values to remove circle/arc type artifacts in the reconstruction. Set to True to perform
-        an automated padding or specify a certain value as an integer.
-    recon_size : int, optional
-        The [recon_size, recon_size] shape of the reconstructed slice in pixels.
-        By default (None), the reconstructed size will be the dimension of the horizontal detector.
-    recon_mask_radius: float
-        The radius of the circular mask that applies to the reconstructed slice in order to crop
-        out some undesirable artifacts. The values outside the given diameter will be set to zero.
-        To implement the cropping one can use the range [0.7-1.0] or set to 2.0 when no cropping required.
-    iterations : int
-        The number of ADMM algorithm iterations. The recommended range is between 3 to 5 with initialisation and
-        more than 10 without. Assuming that the subsets_number is reasonably large (>12).
-    subsets_number: int
-        The number of the ordered subsets to accelerate convergence. The recommended range is between 12 to 24.
-    initialisation: str, optional
-        Initialise ADMM with the reconstructed image to reduce the number of iterations and accelerate. Choose between 'CGLS' when data
-        is noisy and undersampled, 'FBP' when data is of better quality (default) or None.
-    ADMM_rho_const: float
-        Convergence related parameter for ADMM, higher values lead to slower convergence, but too small values can destabilise the iterations.
-        Recommended range is between 0.9 and 2.0.
-    ADMM_relax_par: Relaxation parameter which can lead to acceleration of the algorithm, keep it in the range between 1.5 and 1.8 to avoid divergence.     regularisation_type: str
-        A method to use for regularisation. Currently PD_TV and ROF_TV are available.
-    regularisation_parameter: float
-        The main regularisation parameter to control the amount of smoothing/noise removal. Larger values lead to stronger smoothing.
-    regularisation_iterations: int
-        The number of iterations for regularisers (aka INNER iterations).
-    regularisation_half_precision: bool
-        Perform faster regularisation computation in half-precision with a very minimal sacrifice in quality.
-    nonnegativity : bool
-        Impose nonnegativity constraint (set to True) on the reconstructed image. Default False.
-    gpu_id : int
-        A GPU device index to perform operation on.
-
-    Returns
-    -------
-    cp.ndarray
-        The ADMM reconstructed volume as a CuPy array.
-    """
-    if initialisation not in ["FBP", "CGLS", None]:
-        raise ValueError(
-            "The acceptable values for initialisation are 'FBP','CGLS' and None"
-        )
-
-    if initialisation is not None:
-        if detector_pad == True:
-            detector_pad = __estimate_detectorHoriz_padding(data.shape[2])
-
-        if detector_pad > 0:
-            # if detector_pad is not zero we need to reconstruct the image on the recon+2*detector_pad size
-            recon_size = data.shape[2] + 2 * detector_pad
-
-        if initialisation == "FBP":
+        elif initialisation == "SIRT":
             initialisation_vol = cp.require(
                 cp.swapaxes(
-                    FBP3d_tomobar(
+                    SIRT3d_tomobar(
                         data,
                         angles=angles,
                         center=center,
                         detector_pad=detector_pad,
                         recon_size=recon_size,
                         recon_mask_radius=recon_mask_radius,
-                    ),
-                    0,
-                    1,
-                ),
-                requirements="C",
-            )
-        elif initialisation == "CGLS":
-            initialisation_vol = cp.require(
-                cp.swapaxes(
-                    CGLS3d_tomobar(
-                        data,
-                        angles=angles,
-                        center=center,
-                        detector_pad=detector_pad,
-                        recon_size=recon_size,
-                        recon_mask_radius=recon_mask_radius,
-                        iterations=15,
+                        iterations=150,
+                        nonnegativity=True,
                     ),
                     0,
                     1,
@@ -915,12 +799,18 @@ def _instantiate_direct_recon_class(
         initialisation_vol = None
 
     RecToolsCP = _instantiate_iterative_recon_class(
-        data, angles, center, detector_pad, recon_size, gpu_id, datafidelity="LS"
+        data,
+        angles,
+        center,
+        detector_pad,
+        recon_size,
+        subsets_number,
+        gpu_id,
     )
 
     _data_ = {
-        "projection_norm_data": data,
-        "OS_number": subsets_number,
+        "projection_data": data,
+        "data_fidelity": data_fidelity,
         "data_axes_labels_order": input_data_axis_labels,
     }
 
@@ -1044,8 +934,8 @@ def _instantiate_iterative_recon_class(
     center: Optional[float] = None,
     detector_pad: Union[bool, int] = False,
     recon_size: Optional[int] = None,
+    OS_number: int = 1,
     gpu_id: int = 0,
-    datafidelity: str = "LS",
 ) -> Type:
     """instantiate ToMoBAR's iterative recon class
 
@@ -1055,8 +945,8 @@ def _instantiate_iterative_recon_class(
         center (Optional[float], optional): center of recon. Defaults to None.
         detector_pad : (Union[bool, int]) : Detector width padding. Defaults to False.
         recon_size (Optional[int], optional): recon_size. Defaults to None.
-        datafidelity (str, optional): Data fidelity
-        gpu_id (int, optional): gpu ID. Defaults to 0.
+        OS_number (int): The number of ordered subsets, Defaults to 1.
+        gpu_id (int): gpu ID. Defaults to 0.
 
     Returns:
         Type[RecToolsIRCuPy]: an instance of the iterative class
@@ -1072,14 +962,14 @@ def _instantiate_iterative_recon_class(
     RecToolsCP = RecToolsIRCuPy(
         DetectorsDimH=data.shape[2],  # Horizontal detector dimension
         DetectorsDimH_pad=detector_pad,  # padding for horizontal detector
-        DetectorsDimV=data.shape[1],  # Vertical detector dimension (3D case)
+        DetectorsDimV=data.shape[1],  # Vertical detector dimension
         CenterRotOffset=data.shape[2] / 2
         - center
         - 0.5,  # Center of Rotation scalar or a vector
         AnglesVec=-angles,  # A vector of projection angles in radians
         ObjSize=recon_size,  # Reconstructed object dimensions (scalar)
-        datafidelity=datafidelity,
-        device_projector=gpu_id,
+        device_projector=gpu_id,  # device number
+        OS_number=OS_number,  # the number of ordered subsets
     )
     return RecToolsCP
 
diff --git a/httomolibgpu/recon/rotation.py b/httomolibgpu/recon/rotation.py
index ec2554e4..6cab12c0 100644
--- a/httomolibgpu/recon/rotation.py
+++ b/httomolibgpu/recon/rotation.py
@@ -240,9 +240,9 @@ def _search_fine(sino, srad, step, init_cen, ratio, drop):
     mask = _create_mask(2 * nrow, ncol, 0.5 * ratio * ncol, drop)
 
     cen_fliplr = (ncol - 1.0) / 2.0
-    srad = np.clip(np.abs(srad), 1, ncol // 10 - 1)
-    step = np.clip(np.abs(step), 0.1, 1.1)
-    init_cen = np.clip(init_cen, srad, ncol - srad - 1)
+    srad = np.clip(np.abs(srad), 1, ncol // 10 - 1, dtype=np.float32)
+    step = np.clip(np.abs(step), 0.1, 1.1, dtype=np.float32)
+    init_cen = np.clip(init_cen, srad, ncol - srad - 1, dtype=np.float32)
     list_cor = init_cen + cp.arange(-srad, srad + step, step, dtype=cp.float32)
     list_shift = 2.0 * (list_cor - cen_fliplr)
     list_metric = cp.empty(list_shift.shape, dtype="float32")
diff --git a/pyproject.toml b/pyproject.toml
index 9e448e2a..78d34f4b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,15 +31,15 @@ authors = [
 classifiers = [
     "Development Status :: 4 - Beta",
     "License :: OSI Approved :: BSD License",
-    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.12",
     "Environment :: GPU :: NVIDIA CUDA"
 ]
-requires-python = ">=3.10"
+requires-python = ">=3.12"
 dynamic = ["version"]
 dependencies = [
-    "cupy==12.3.0",
+    "cupy==14.0.*",
     "nvtx",
-    "numpy",
+    "numpy==2.4.*",
     "scipy",
     "pillow",
     "scikit-image",