From 907e098f461eef29f3e2878af34aff3a987f2d19 Mon Sep 17 00:00:00 2001 From: "Hans J. Johnson" Date: Mon, 8 Jun 2026 12:03:35 -0500 Subject: [PATCH 1/4] COMP: Simplify CUDA backend wiring and backend-default selection Build the CUDA backend as host code. itkVkCommon.cxx contains no device kernels (VkFFT JIT-compiles them at runtime via nvrtc), so the nvcc language apparatus was inert and is removed: enable_language(CUDA), the LANGUAGE CUDA override that targeted a nonexistent itkVkCommon.cpp, the $ gencode flags, and CUDA separable compilation. Removing enable_language(CUDA) also avoids enabling the CUDA language for the whole parent project when the module is built in the ITK tree. Make itk-module-init.cmake the single source of the VKFFT_BACKEND default by guarding the duplicate set() in CMakeLists.txt, unset the temporary Apple framework probe cache entries, and align the cache help string with the full backend list. Validated on an RTX 6000 Ada (CUDA 13.2): explicit VKFFT_BACKEND=1, auto-detect (selects CUDA), and VKFFT_BACKEND=3 (OpenCL) each build and pass 36/36 VkFFTBackend tests. --- CMakeLists.txt | 6 +++++- itk-module-init.cmake | 6 ++++-- src/CMakeLists.txt | 18 +++--------------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 344aef4..eeb2e39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,11 @@ include(FetchContent) #### Set up VkFFT flags #### -set(VKFFT_BACKEND 3 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal") +# itk-module-init.cmake normally establishes VKFFT_BACKEND (with auto-detection); +# this is the fallback default when that file was not included first. +if(NOT DEFINED VKFFT_BACKEND) + set(VKFFT_BACKEND 3 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal") +endif() add_compile_definitions(VKFFT_BACKEND=${VKFFT_BACKEND}) if(VKFFT_BACKEND EQUAL 1) diff --git a/itk-module-init.cmake b/itk-module-init.cmake index ac65fbe..e72acd9 100644 --- a/itk-module-init.cmake +++ b/itk-module-init.cmake @@ -12,6 +12,9 @@ if(NOT DEFINED VKFFT_BACKEND) if(_vkfft_metal_probe AND _vkfft_foundation_probe AND _vkfft_quartzcore_probe) set(_vkfft_have_metal TRUE) endif() + unset(_vkfft_metal_probe CACHE) + unset(_vkfft_foundation_probe CACHE) + unset(_vkfft_quartzcore_probe CACHE) endif() if(CMAKE_CUDA_COMPILER) set(_vkfft_backend_default 1) @@ -23,9 +26,8 @@ if(NOT DEFINED VKFFT_BACKEND) else() set(_vkfft_backend_default ${VKFFT_BACKEND}) endif() -set(VKFFT_BACKEND ${_vkfft_backend_default} CACHE STRING "1 - CUDA, 3 - OpenCL, 4 - Level Zero, 5 - Metal") +set(VKFFT_BACKEND ${_vkfft_backend_default} CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal") if(${VKFFT_BACKEND} EQUAL 1) - enable_language(CUDA) find_package(CUDAToolkit REQUIRED) set(CUDA_LIBRARIES CUDA::cudart) find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDAToolkit_LIBRARY_DIR}" "/usr/lib64" "/usr/local/cuda/lib64") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 08ea03a..b87b1fb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,21 +7,9 @@ set(VkFFTBackend_SRCS itk_module_add_library(VkFFTBackend ${VkFFTBackend_SRCS}) if(${VKFFT_BACKEND} EQUAL 1) - target_link_libraries(VkFFTBackend PUBLIC ${CUDA_LIBRARIES} CUDA::cuda_driver ${CUDA_NVRTC_LIB} VkFFT half) - if(MSVC) - else() - set_source_files_properties(itkVkCommon.cpp PROPERTIES LANGUAGE CUDA) - endif() - target_compile_options(VkFFTBackend PUBLIC "$<$:SHELL - -DVKFFT_BACKEND=${VKFFT_BACKEND} - -gencode arch=compute_35,code=compute_35 - -gencode arch=compute_60,code=compute_60 - -gencode arch=compute_70,code=compute_70 - -gencode arch=compute_75,code=compute_75 - -gencode arch=compute_80,code=compute_80 - -gencode arch=compute_86,code=compute_86>") - set_target_properties(VkFFTBackend PROPERTIES CUDA_SEPARABLE_COMPILATION ON) - set_target_properties(VkFFTBackend PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) + # itkVkCommon.cxx is host code; VkFFT JIT-compiles CUDA kernels at runtime + # via nvrtc, so no nvcc device compilation is required here. + target_link_libraries(VkFFTBackend PUBLIC ${CUDA_LIBRARIES} CUDA::cuda_driver ${CUDA_NVRTC_LIB} VkFFT half) elseif(${VKFFT_BACKEND} EQUAL 3) target_link_libraries(VkFFTBackend PUBLIC ${OpenCL_LIBRARY}) endif() From c060f4dc637c512f60593553339e9041a8526057 Mon Sep 17 00:00:00 2001 From: "Hans J. Johnson" Date: Mon, 8 Jun 2026 12:29:32 -0500 Subject: [PATCH 2/4] STYLE: Format CMake and wrapping files with gersemi Add a remote-module .gersemi.config matching ITK conventions (2-space indent, 80-column line length, favour-expansion list style) and apply gersemi 0.19.3 to every CMakeLists.txt, *.cmake, and *.wrap file. Formatting only; no build-behavior change. Validated VKFFT_BACKEND=1 builds and passes 36/36 VkFFTBackend tests on CUDA hardware. --- .gersemi.config | 11 + CMakeLists.txt | 204 +++++++++++++----- itk-module-init.cmake | 65 ++++-- src/CMakeLists.txt | 15 +- test/CMakeLists.txt | 92 ++++---- ...itkVkComplexToComplex1DFFTImageFilter.wrap | 12 +- .../itkVkComplexToComplexFFTImageFilter.wrap | 12 +- .../itkVkDiscreteGaussianImageFilter.wrap | 2 +- wrapping/itkVkForward1DFFTImageFilter.wrap | 12 +- wrapping/itkVkForwardFFTImageFilter.wrap | 12 +- ...fHermitianToRealInverseFFTImageFilter.wrap | 12 +- wrapping/itkVkInverse1DFFTImageFilter.wrap | 12 +- wrapping/itkVkInverseFFTImageFilter.wrap | 12 +- ...tkVkMultiResolutionPyramidImageFilter.wrap | 2 +- ...lToHalfHermitianForwardFFTImageFilter.wrap | 12 +- 15 files changed, 334 insertions(+), 153 deletions(-) create mode 100644 .gersemi.config diff --git a/.gersemi.config b/.gersemi.config new file mode 100644 index 0000000..b8aa29b --- /dev/null +++ b/.gersemi.config @@ -0,0 +1,11 @@ +## Gersemi configuration for ITKVkFFTBackend (no CMake/stubs) +# yaml-language-server: $schema=https://raw.githubusercontent.com/BlankSpruce/gersemi/0.19.3/gersemi/configuration.schema.json + +disable_formatting: false +extensions: [] +# use ITK preferred 2 space indent +indent: 2 +line_length: 80 +list_expansion: favour-expansion +unsafe: false +warn_about_unknown_commands: false diff --git a/CMakeLists.txt b/CMakeLists.txt index eeb2e39..c7e7b56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,12 @@ include(FetchContent) # itk-module-init.cmake normally establishes VKFFT_BACKEND (with auto-detection); # this is the fallback default when that file was not included first. if(NOT DEFINED VKFFT_BACKEND) - set(VKFFT_BACKEND 3 CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal") + set( + VKFFT_BACKEND + 3 + CACHE STRING + "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal" + ) endif() add_compile_definitions(VKFFT_BACKEND=${VKFFT_BACKEND}) @@ -26,20 +31,31 @@ if(VKFFT_BACKEND EQUAL 1) find_package(CUDAToolkit REQUIRED) list(APPEND VkFFTBackend_SYSTEM_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS}) elseif(VKFFT_BACKEND EQUAL 3) - set(CL_TARGET_OPENCL_VERSION 300 CACHE STRING "OpenCL API version to target (e.g. 120, 300)") + set( + CL_TARGET_OPENCL_VERSION + 300 + CACHE STRING + "OpenCL API version to target (e.g. 120, 300)" + ) add_compile_definitions(CL_TARGET_OPENCL_VERSION=${CL_TARGET_OPENCL_VERSION}) ## When this module is loaded by an app, load OpenCL too. - set(VkFFTBackend_EXPORT_CODE_INSTALL " + set( + VkFFTBackend_EXPORT_CODE_INSTALL + " set(OpenCL_DIR \"${OpenCL_DIR}\") find_package(OpenCL REQUIRED) - ") - set(VkFFTBackend_EXPORT_CODE_BUILD " + " + ) + set( + VkFFTBackend_EXPORT_CODE_BUILD + " if(NOT ITK_BINARY_DIR) set(OpenCL_DIR \"${OpenCL_DIR}\") find_package(OpenCL REQUIRED) endif() - ") + " + ) list(APPEND VkFFTBackend_SYSTEM_INCLUDE_DIRS ${OpenCL_INCLUDE_DIRS}) get_filename_component(OpenCL_LIB_DIR ${OpenCL_LIBRARY} DIRECTORY) set(VkFFTBackend_SYSTEM_LIBRARY_DIRS ${OpenCL_LIB_DIR}) @@ -47,31 +63,57 @@ elseif(VKFFT_BACKEND EQUAL 4) # oneAPI Level Zero loader (libze_loader) + headers (). # Installed by Intel's "level-zero" package on Linux/Windows; also shipped # with the oneAPI Base Toolkit. - find_path(LevelZero_INCLUDE_DIR - NAMES level_zero/ze_api.h - HINTS ENV LEVEL_ZERO_ROOT ENV CMPLR_ROOT - PATH_SUFFIXES include) - find_library(LevelZero_LIBRARY - NAMES ze_loader - HINTS ENV LEVEL_ZERO_ROOT ENV CMPLR_ROOT - PATH_SUFFIXES lib lib64 lib/x64) + find_path( + LevelZero_INCLUDE_DIR + NAMES + level_zero/ze_api.h + HINTS + ENV LEVEL_ZERO_ROOT + ENV CMPLR_ROOT + PATH_SUFFIXES + include + ) + find_library( + LevelZero_LIBRARY + NAMES + ze_loader + HINTS + ENV LEVEL_ZERO_ROOT + ENV CMPLR_ROOT + PATH_SUFFIXES + lib + lib64 + lib/x64 + ) if(NOT LevelZero_INCLUDE_DIR OR NOT LevelZero_LIBRARY) - message(FATAL_ERROR "VKFFT_BACKEND=4 (Level Zero) requires the oneAPI Level Zero loader (ze_loader) and headers (level_zero/ze_api.h). Install the 'level-zero' package or set LEVEL_ZERO_ROOT.") + message( + FATAL_ERROR + "VKFFT_BACKEND=4 (Level Zero) requires the oneAPI Level Zero loader (ze_loader) and headers (level_zero/ze_api.h). Install the 'level-zero' package or set LEVEL_ZERO_ROOT." + ) endif() # VkFFT includes bare; this module uses — both dirs needed. - list(APPEND VkFFTBackend_SYSTEM_INCLUDE_DIRS + list( + APPEND + VkFFTBackend_SYSTEM_INCLUDE_DIRS ${LevelZero_INCLUDE_DIR} - ${LevelZero_INCLUDE_DIR}/level_zero) + ${LevelZero_INCLUDE_DIR}/level_zero + ) list(APPEND VkFFTBackend_SYSTEM_LIBRARIES ${LevelZero_LIBRARY}) - set(VkFFTBackend_EXPORT_CODE_INSTALL " + set( + VkFFTBackend_EXPORT_CODE_INSTALL + " find_path(LevelZero_INCLUDE_DIR NAMES level_zero/ze_api.h) find_library(LevelZero_LIBRARY NAMES ze_loader) - ") + " + ) set(VkFFTBackend_EXPORT_CODE_BUILD "${VkFFTBackend_EXPORT_CODE_INSTALL}") elseif(VKFFT_BACKEND EQUAL 5) if(NOT APPLE) - message(FATAL_ERROR "VKFFT_BACKEND=5 (Metal) requires Apple platforms (macOS/iOS).") + message( + FATAL_ERROR + "VKFFT_BACKEND=5 (Metal) requires Apple platforms (macOS/iOS)." + ) endif() # metal-cpp requires C++17. @@ -94,20 +136,39 @@ elseif(VKFFT_BACKEND EQUAL 5) find_library(METAL_FRAMEWORK Metal REQUIRED) find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED) find_library(QUARTZCORE_FRAMEWORK QuartzCore REQUIRED) - list(APPEND VkFFTBackend_SYSTEM_LIBRARIES - ${METAL_FRAMEWORK} ${FOUNDATION_FRAMEWORK} ${QUARTZCORE_FRAMEWORK}) + list( + APPEND + VkFFTBackend_SYSTEM_LIBRARIES + ${METAL_FRAMEWORK} + ${FOUNDATION_FRAMEWORK} + ${QUARTZCORE_FRAMEWORK} + ) - set(VkFFTBackend_EXPORT_CODE_INSTALL " + set( + VkFFTBackend_EXPORT_CODE_INSTALL + " find_library(METAL_FRAMEWORK Metal REQUIRED) find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED) find_library(QUARTZCORE_FRAMEWORK QuartzCore REQUIRED) - ") + " + ) set(VkFFTBackend_EXPORT_CODE_BUILD "${VkFFTBackend_EXPORT_CODE_INSTALL}") else() - message(WARNING "ITKVkFFTBackend currently supports only CUDA, OpenCL, Level Zero, or Metal backends.") + message( + WARNING + "ITKVkFFTBackend currently supports only CUDA, OpenCL, Level Zero, or Metal backends." + ) endif() -if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") +if( + CMAKE_CXX_COMPILER_ID + STREQUAL + "Clang" + OR + CMAKE_CXX_COMPILER_ID + STREQUAL + "GNU" +) # Remove this list of disabled warnings when VkFFT has been updated message("Adding compile options: -Wno-format-overflow") add_compile_options(-Wno-format-overflow) @@ -117,7 +178,11 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") # C4146: unary minus operator applied to unsigned type, result still unsigned # C4244: 'argument': conversion from 'double' to 'uint64_t', possible loss of data # C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead. - add_compile_options(/wd4146 /wd4244 /wd4996) + add_compile_options( + /wd4146 + /wd4244 + /wd4996 + ) endif() #### Populate VkFFT dependency ### @@ -129,22 +194,33 @@ endif() set(VkFFT_GIT_TAG "v1.3.4") set(VkFFT_GIT_REPOSITORY "https://github.com/DTolm/VkFFT") -set(VkFFT_HEADER_URL "https://raw.githubusercontent.com/DTolm/VkFFT/${VkFFT_GIT_TAG}/vkFFT/vkFFT.h") +set( + VkFFT_HEADER_URL + "https://raw.githubusercontent.com/DTolm/VkFFT/${VkFFT_GIT_TAG}/vkFFT/vkFFT.h" +) # v1.3.x split vkFFT.h into a multi-file tree, so the single-header URL # fetch no longer suffices; force the full-repo path when not overridden. -option(BUILD_VKFFT "Fetch the full VkFFT repo (required for v1.3+ multi-file layout)" ON) +option( + BUILD_VKFFT + "Fetch the full VkFFT repo (required for v1.3+ multi-file layout)" + ON +) if(BUILD_VKFFT) # Fetch the full VkFFT repo with the header-only library and build targets FetchContent_Declare( vkfft_lib GIT_REPOSITORY ${VkFFT_GIT_REPOSITORY} GIT_TAG ${VkFFT_GIT_TAG} - ) + ) FetchContent_GetProperties(vkfft_lib) if(NOT vkfft_lib_POPULATED) FetchContent_Populate(vkfft_lib) - add_subdirectory(${vkfft_lib_SOURCE_DIR} ${vkfft_lib_BINARY_DIR} EXCLUDE_FROM_ALL) + add_subdirectory( + ${vkfft_lib_SOURCE_DIR} + ${vkfft_lib_BINARY_DIR} + EXCLUDE_FROM_ALL + ) endif() set(vkfft_INCLUDE_DIR "${vkfft_lib_SOURCE_DIR}/vkFFT") @@ -153,7 +229,8 @@ else() set(vkfft_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_deps/include") FetchContent_Declare( vkfft_header_only - URL ${VkFFT_HEADER_URL} + URL + ${VkFFT_HEADER_URL} DOWNLOAD_DIR "${vkfft_INCLUDE_DIR}" DOWNLOAD_NO_EXTRACT TRUE ) @@ -174,7 +251,12 @@ endif() if(VKFFT_BACKEND EQUAL 4) target_link_libraries(VkFFTBackend PUBLIC ${LevelZero_LIBRARY}) - target_include_directories(VkFFTBackend SYSTEM PUBLIC ${LevelZero_INCLUDE_DIR}) + target_include_directories( + VkFFTBackend + SYSTEM + PUBLIC + ${LevelZero_INCLUDE_DIR} + ) # Probe at configure time whether a Level Zero driver implementation is # actually present on this host (the loader can be installed without any @@ -184,7 +266,10 @@ if(VKFFT_BACKEND EQUAL 4) # stays clean. if(BUILD_TESTING) set(_probe_src "${CMAKE_CURRENT_BINARY_DIR}/level_zero_probe.cxx") - file(WRITE "${_probe_src}" [=[ + file( + WRITE + "${_probe_src}" + [=[ #include int main(){ if (zeInit(0) != ZE_RESULT_SUCCESS) return 1; @@ -192,26 +277,49 @@ int main(){ if (zeDriverGet(&n, nullptr) != ZE_RESULT_SUCCESS) return 1; return n > 0 ? 0 : 1; } -]=]) - try_run(VkFFTBackend_LEVEL_ZERO_RUNTIME_RUN - VkFFTBackend_LEVEL_ZERO_RUNTIME_COMPILE - "${CMAKE_CURRENT_BINARY_DIR}/level_zero_probe" - SOURCES "${_probe_src}" - CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${LevelZero_INCLUDE_DIR};${LevelZero_INCLUDE_DIR}/level_zero" - LINK_LIBRARIES ${LevelZero_LIBRARY}) - if(NOT VkFFTBackend_LEVEL_ZERO_RUNTIME_COMPILE OR NOT VkFFTBackend_LEVEL_ZERO_RUNTIME_RUN EQUAL 0) - message(STATUS "VkFFTBackend: no working Level Zero driver detected (zeInit/zeDriverGet probe failed); FFT tests will be DISABLED.") +]=] + ) + try_run( + VkFFTBackend_LEVEL_ZERO_RUNTIME_RUN + VkFFTBackend_LEVEL_ZERO_RUNTIME_COMPILE + "${CMAKE_CURRENT_BINARY_DIR}/level_zero_probe" + SOURCES + "${_probe_src}" + CMAKE_FLAGS + "-DINCLUDE_DIRECTORIES=${LevelZero_INCLUDE_DIR};${LevelZero_INCLUDE_DIR}/level_zero" + LINK_LIBRARIES + ${LevelZero_LIBRARY} + ) + if( + NOT + VkFFTBackend_LEVEL_ZERO_RUNTIME_COMPILE + OR + NOT + VkFFTBackend_LEVEL_ZERO_RUNTIME_RUN + EQUAL + 0 + ) + message( + STATUS + "VkFFTBackend: no working Level Zero driver detected (zeInit/zeDriverGet probe failed); FFT tests will be DISABLED." + ) set(VkFFTBackend_LEVEL_ZERO_RUNTIME_AVAILABLE FALSE CACHE INTERNAL "") else() - message(STATUS "VkFFTBackend: Level Zero runtime probe found at least one driver.") + message( + STATUS + "VkFFTBackend: Level Zero runtime probe found at least one driver." + ) set(VkFFTBackend_LEVEL_ZERO_RUNTIME_AVAILABLE TRUE CACHE INTERNAL "") endif() endif() endif() - if(VKFFT_BACKEND EQUAL 5) - target_link_libraries(VkFFTBackend PUBLIC - ${METAL_FRAMEWORK} ${FOUNDATION_FRAMEWORK} ${QUARTZCORE_FRAMEWORK}) + target_link_libraries( + VkFFTBackend + PUBLIC + ${METAL_FRAMEWORK} + ${FOUNDATION_FRAMEWORK} + ${QUARTZCORE_FRAMEWORK} + ) endif() - diff --git a/itk-module-init.cmake b/itk-module-init.cmake index e72acd9..e7d00a1 100644 --- a/itk-module-init.cmake +++ b/itk-module-init.cmake @@ -9,7 +9,13 @@ if(NOT DEFINED VKFFT_BACKEND) find_library(_vkfft_metal_probe Metal) find_library(_vkfft_foundation_probe Foundation) find_library(_vkfft_quartzcore_probe QuartzCore) - if(_vkfft_metal_probe AND _vkfft_foundation_probe AND _vkfft_quartzcore_probe) + if( + _vkfft_metal_probe + AND + _vkfft_foundation_probe + AND + _vkfft_quartzcore_probe + ) set(_vkfft_have_metal TRUE) endif() unset(_vkfft_metal_probe CACHE) @@ -26,28 +32,61 @@ if(NOT DEFINED VKFFT_BACKEND) else() set(_vkfft_backend_default ${VKFFT_BACKEND}) endif() -set(VKFFT_BACKEND ${_vkfft_backend_default} CACHE STRING "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal") +set( + VKFFT_BACKEND + ${_vkfft_backend_default} + CACHE STRING + "0 - Vulkan, 1 - CUDA, 2 - HIP, 3 - OpenCL, 4 - Level Zero, 5 - Metal" +) if(${VKFFT_BACKEND} EQUAL 1) find_package(CUDAToolkit REQUIRED) set(CUDA_LIBRARIES CUDA::cudart) - find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDAToolkit_LIBRARY_DIR}" "/usr/lib64" "/usr/local/cuda/lib64") + find_library( + CUDA_NVRTC_LIB + libnvrtc + nvrtc + HINTS + "${CUDAToolkit_LIBRARY_DIR}" + "/usr/lib64" + "/usr/local/cuda/lib64" + ) elseif(${VKFFT_BACKEND} EQUAL 3) find_package(OpenCL REQUIRED) elseif(${VKFFT_BACKEND} EQUAL 4) - find_path(LevelZero_INCLUDE_DIR - NAMES level_zero/ze_api.h - HINTS ENV LEVEL_ZERO_ROOT ENV CMPLR_ROOT - PATH_SUFFIXES include) - find_library(LevelZero_LIBRARY - NAMES ze_loader - HINTS ENV LEVEL_ZERO_ROOT ENV CMPLR_ROOT - PATH_SUFFIXES lib lib64 lib/x64) + find_path( + LevelZero_INCLUDE_DIR + NAMES + level_zero/ze_api.h + HINTS + ENV LEVEL_ZERO_ROOT + ENV CMPLR_ROOT + PATH_SUFFIXES + include + ) + find_library( + LevelZero_LIBRARY + NAMES + ze_loader + HINTS + ENV LEVEL_ZERO_ROOT + ENV CMPLR_ROOT + PATH_SUFFIXES + lib + lib64 + lib/x64 + ) if(NOT LevelZero_INCLUDE_DIR OR NOT LevelZero_LIBRARY) - message(FATAL_ERROR "VKFFT_BACKEND=4 (Level Zero) requires the oneAPI Level Zero loader (ze_loader) and headers (level_zero/ze_api.h).") + message( + FATAL_ERROR + "VKFFT_BACKEND=4 (Level Zero) requires the oneAPI Level Zero loader (ze_loader) and headers (level_zero/ze_api.h)." + ) endif() elseif(${VKFFT_BACKEND} EQUAL 5) if(NOT APPLE) - message(FATAL_ERROR "VKFFT_BACKEND=5 (Metal) requires Apple platforms (macOS/iOS, including Apple Silicon).") + message( + FATAL_ERROR + "VKFFT_BACKEND=5 (Metal) requires Apple platforms (macOS/iOS, including Apple Silicon)." + ) endif() find_library(METAL_FRAMEWORK Metal REQUIRED) find_library(FOUNDATION_FRAMEWORK Foundation REQUIRED) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b87b1fb..79b91ce 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,15 +1,24 @@ -set(VkFFTBackend_SRCS +set( + VkFFTBackend_SRCS itkVkCommon.cxx itkVkGlobalConfiguration.cxx itkVkFFTImageFilterInitFactory.cxx - ) +) itk_module_add_library(VkFFTBackend ${VkFFTBackend_SRCS}) if(${VKFFT_BACKEND} EQUAL 1) # itkVkCommon.cxx is host code; VkFFT JIT-compiles CUDA kernels at runtime # via nvrtc, so no nvcc device compilation is required here. - target_link_libraries(VkFFTBackend PUBLIC ${CUDA_LIBRARIES} CUDA::cuda_driver ${CUDA_NVRTC_LIB} VkFFT half) + target_link_libraries( + VkFFTBackend + PUBLIC + ${CUDA_LIBRARIES} + CUDA::cuda_driver + ${CUDA_NVRTC_LIB} + VkFFT + half + ) elseif(${VKFFT_BACKEND} EQUAL 3) target_link_libraries(VkFFTBackend PUBLIC ${OpenCL_LIBRARY}) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3389816..32ac8f9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,6 +1,7 @@ itk_module_test() -set(VkFFTBackendTests +set( + VkFFTBackendTests itkVkComplexToComplexFFTImageFilterTest.cxx itkVkComplexToComplex1DFFTImageFilterBaselineTest.cxx itkVkComplexToComplex1DFFTImageFilterSizesTest.cxx @@ -14,12 +15,12 @@ set(VkFFTBackendTests itkVkInverse1DFFTImageFilterBaselineTest.cxx itkVkMultiResolutionPyramidImageFilterTest.cxx itkVkMultiResolutionPyramidImageFilterFactoryTest.cxx - ) +) -CreateTestDriver(VkFFTBackend +createtestdriver(VkFFTBackend "${VkFFTBackend-Test_LIBRARIES};${OpenCL_LIBRARY}" "${VkFFTBackendTests}" - ) +) # Helper: register one ctest case per precision for each # logical test. Each test driver entry accepts a leading "float" or @@ -36,7 +37,12 @@ endif() function(_vkfft_disable_on_unsupported_fp64 test_name) if(VkFFTBackend_DOUBLE_PRECISION_GPU_UNSUPPORTED) - set_tests_properties(${test_name} PROPERTIES DISABLED TRUE) + set_tests_properties( + ${test_name} + PROPERTIES + DISABLED + TRUE + ) endif() endfunction() @@ -50,7 +56,7 @@ itk_add_test(NAME itkVkComplexToComplexFFTImageFilterTestFloat COMMAND VkFFTBackendTestDriver itkVkComplexToComplexFFTImageFilterTest float ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplexFFTImageFilterTestOutputFloat.mha - ) +) itk_add_test(NAME itkVkComplexToComplexFFTImageFilterTestDouble COMMAND VkFFTBackendTestDriver --compare @@ -58,7 +64,7 @@ itk_add_test(NAME itkVkComplexToComplexFFTImageFilterTestDouble ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplexFFTImageFilterTestOutputDouble.mha itkVkComplexToComplexFFTImageFilterTest double ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplexFFTImageFilterTestOutputDouble.mha - ) +) _vkfft_disable_on_unsupported_fp64(itkVkComplexToComplexFFTImageFilterTestDouble) # ----------------------------------------------------------------------------- @@ -69,7 +75,7 @@ itk_add_test(NAME itkVkComplexToComplex1DFFTImageFilterSizesTestFloat COMMAND VkFFTBackendTestDriver itkVkComplexToComplex1DFFTImageFilterSizesTest float ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplex1DFFTImageFilterSizesTestOutputFloat.mha - ) +) itk_add_test(NAME itkVkComplexToComplex1DFFTImageFilterSizesTestDouble COMMAND VkFFTBackendTestDriver --compare @@ -77,7 +83,7 @@ itk_add_test(NAME itkVkComplexToComplex1DFFTImageFilterSizesTestDouble ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplex1DFFTImageFilterSizesTestOutputDouble.mha itkVkComplexToComplex1DFFTImageFilterSizesTest double ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplex1DFFTImageFilterSizesTestOutputDouble.mha - ) +) _vkfft_disable_on_unsupported_fp64(itkVkComplexToComplex1DFFTImageFilterSizesTestDouble) # ----------------------------------------------------------------------------- @@ -92,7 +98,7 @@ itk_add_test(NAME itkVkComplexToComplex1DFFTImageFilterBaselineTestFloat DATA{Input/itkForward1DFFTImageFilterTestBaselineRealFull.mhd,itkForward1DFFTImageFilterTestBaselineRealFull.raw} DATA{Input/itkForward1DFFTImageFilterTestBaselineImaginaryFull.mhd,itkForward1DFFTImageFilterTestBaselineImaginaryFull.raw} ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplex1DFFTImageFilterBaselineTestOutputFloat.mha - ) +) itk_add_test(NAME itkVkComplexToComplex1DFFTImageFilterBaselineTestDouble COMMAND VkFFTBackendTestDriver --compare @@ -102,7 +108,7 @@ itk_add_test(NAME itkVkComplexToComplex1DFFTImageFilterBaselineTestDouble DATA{Input/itkForward1DFFTImageFilterTestBaselineRealFull.mhd,itkForward1DFFTImageFilterTestBaselineRealFull.raw} DATA{Input/itkForward1DFFTImageFilterTestBaselineImaginaryFull.mhd,itkForward1DFFTImageFilterTestBaselineImaginaryFull.raw} ${ITK_TEST_OUTPUT_DIR}/itkVkComplexToComplex1DFFTImageFilterBaselineTestOutputDouble.mha - ) +) _vkfft_disable_on_unsupported_fp64(itkVkComplexToComplex1DFFTImageFilterBaselineTestDouble) # ----------------------------------------------------------------------------- @@ -119,7 +125,7 @@ itk_add_test(NAME itkVkForward1DFFTImageFilterBaselineTestFloat itkVkForward1DFFTImageFilterBaselineTest float DATA{Input/TreeBarkTexture.png} ${ITK_TEST_OUTPUT_DIR}/itkVkForward1DFFTImageFilterTestBaselineOutputFloat - ) +) itk_add_test(NAME itkVkForward1DFFTImageFilterBaselineTestDouble COMMAND VkFFTBackendTestDriver --compare @@ -131,7 +137,7 @@ itk_add_test(NAME itkVkForward1DFFTImageFilterBaselineTestDouble itkVkForward1DFFTImageFilterBaselineTest double DATA{Input/TreeBarkTexture.png} ${ITK_TEST_OUTPUT_DIR}/itkVkForward1DFFTImageFilterTestBaselineOutputDouble - ) +) _vkfft_disable_on_unsupported_fp64(itkVkForward1DFFTImageFilterBaselineTestDouble) # ----------------------------------------------------------------------------- @@ -146,7 +152,7 @@ itk_add_test(NAME itkVkInverse1DFFTImageFilterBaselineTestFloat DATA{Input/itkForward1DFFTImageFilterTestBaselineRealFull.mhd,itkForward1DFFTImageFilterTestBaselineRealFull.raw} DATA{Input/itkForward1DFFTImageFilterTestBaselineImaginaryFull.mhd,itkForward1DFFTImageFilterTestBaselineImaginaryFull.raw} ${ITK_TEST_OUTPUT_DIR}/itkVkInverse1DFFTImageFilterBaselineTestFloat.mhd - ) +) itk_add_test(NAME itkVkInverse1DFFTImageFilterBaselineTestDouble COMMAND VkFFTBackendTestDriver --compare @@ -156,7 +162,7 @@ itk_add_test(NAME itkVkInverse1DFFTImageFilterBaselineTestDouble DATA{Input/itkForward1DFFTImageFilterTestBaselineRealFull.mhd,itkForward1DFFTImageFilterTestBaselineRealFull.raw} DATA{Input/itkForward1DFFTImageFilterTestBaselineImaginaryFull.mhd,itkForward1DFFTImageFilterTestBaselineImaginaryFull.raw} ${ITK_TEST_OUTPUT_DIR}/itkVkInverse1DFFTImageFilterBaselineTestDouble.mhd - ) +) _vkfft_disable_on_unsupported_fp64(itkVkInverse1DFFTImageFilterBaselineTestDouble) # ----------------------------------------------------------------------------- @@ -164,10 +170,10 @@ _vkfft_disable_on_unsupported_fp64(itkVkInverse1DFFTImageFilterBaselineTestDoubl # ----------------------------------------------------------------------------- itk_add_test(NAME itkVkForwardInverseFFTImageFilterTestFloat COMMAND VkFFTBackendTestDriver itkVkForwardInverseFFTImageFilterTest float - ) +) itk_add_test(NAME itkVkForwardInverseFFTImageFilterTestDouble COMMAND VkFFTBackendTestDriver itkVkForwardInverseFFTImageFilterTest double - ) +) _vkfft_disable_on_unsupported_fp64(itkVkForwardInverseFFTImageFilterTestDouble) # pocl (CPU OpenCL) computes VkFFT's size-19 Bluestein inverse incorrectly, so @@ -175,10 +181,10 @@ _vkfft_disable_on_unsupported_fp64(itkVkForwardInverseFFTImageFilterTestDouble) # (radix-2/3/5/7 plus Bluestein primes 11/13). Real GPUs run the full sweep above. itk_add_test(NAME itkVkForwardInverseFFTImageFilterTestFloatPoclSafe COMMAND VkFFTBackendTestDriver itkVkForwardInverseFFTImageFilterTest float 16 - ) +) itk_add_test(NAME itkVkForwardInverseFFTImageFilterTestDoublePoclSafe COMMAND VkFFTBackendTestDriver itkVkForwardInverseFFTImageFilterTest double 16 - ) +) _vkfft_disable_on_unsupported_fp64(itkVkForwardInverseFFTImageFilterTestDoublePoclSafe) # ----------------------------------------------------------------------------- @@ -186,18 +192,18 @@ _vkfft_disable_on_unsupported_fp64(itkVkForwardInverseFFTImageFilterTestDoublePo # ----------------------------------------------------------------------------- itk_add_test(NAME itkVkForwardInverse1DFFTImageFilterTestFloat COMMAND VkFFTBackendTestDriver itkVkForwardInverse1DFFTImageFilterTest float - ) +) itk_add_test(NAME itkVkForwardInverse1DFFTImageFilterTestDouble COMMAND VkFFTBackendTestDriver itkVkForwardInverse1DFFTImageFilterTest double - ) +) _vkfft_disable_on_unsupported_fp64(itkVkForwardInverse1DFFTImageFilterTestDouble) itk_add_test(NAME itkVkForwardInverse1DFFTImageFilterTestFloatPoclSafe COMMAND VkFFTBackendTestDriver itkVkForwardInverse1DFFTImageFilterTest float 16 - ) +) itk_add_test(NAME itkVkForwardInverse1DFFTImageFilterTestDoublePoclSafe COMMAND VkFFTBackendTestDriver itkVkForwardInverse1DFFTImageFilterTest double 16 - ) +) _vkfft_disable_on_unsupported_fp64(itkVkForwardInverse1DFFTImageFilterTestDoublePoclSafe) # ----------------------------------------------------------------------------- @@ -205,18 +211,18 @@ _vkfft_disable_on_unsupported_fp64(itkVkForwardInverse1DFFTImageFilterTestDouble # ----------------------------------------------------------------------------- itk_add_test(NAME itkVkHalfHermitianFFTImageFilterTestFloat COMMAND VkFFTBackendTestDriver itkVkHalfHermitianFFTImageFilterTest float - ) +) itk_add_test(NAME itkVkHalfHermitianFFTImageFilterTestDouble COMMAND VkFFTBackendTestDriver itkVkHalfHermitianFFTImageFilterTest double - ) +) _vkfft_disable_on_unsupported_fp64(itkVkHalfHermitianFFTImageFilterTestDouble) itk_add_test(NAME itkVkHalfHermitianFFTImageFilterTestFloatPoclSafe COMMAND VkFFTBackendTestDriver itkVkHalfHermitianFFTImageFilterTest float 16 - ) +) itk_add_test(NAME itkVkHalfHermitianFFTImageFilterTestDoublePoclSafe COMMAND VkFFTBackendTestDriver itkVkHalfHermitianFFTImageFilterTest double 16 - ) +) _vkfft_disable_on_unsupported_fp64(itkVkHalfHermitianFFTImageFilterTestDoublePoclSafe) # ----------------------------------------------------------------------------- @@ -225,21 +231,23 @@ _vkfft_disable_on_unsupported_fp64(itkVkHalfHermitianFFTImageFilterTestDoublePoc itk_add_test(NAME itkVkFFTImageFilterFactoryTestFloat COMMAND VkFFTBackendTestDriver itkVkFFTImageFilterFactoryTest float - ) +) itk_add_test(NAME itkVkFFTImageFilterFactoryTestDouble COMMAND VkFFTBackendTestDriver itkVkFFTImageFilterFactoryTest double - ) +) # ----------------------------------------------------------------------------- # GlobalConfigurationTest # ----------------------------------------------------------------------------- itk_add_test(NAME itkVkGlobalConfigurationTestFloat COMMAND VkFFTBackendTestDriver - itkVkGlobalConfigurationTest float) + itkVkGlobalConfigurationTest float +) itk_add_test(NAME itkVkGlobalConfigurationTestDouble COMMAND VkFFTBackendTestDriver - itkVkGlobalConfigurationTest double) + itkVkGlobalConfigurationTest double +) # ----------------------------------------------------------------------------- # MultiResolutionPyramidImageFilterTest @@ -304,11 +312,11 @@ _vkfft_disable_on_unsupported_fp64(itkVkMultiResolutionPyramidImageFilterTestDou itk_add_test(NAME itkVkMultiResolutionPyramidImageFilterFactoryTestFloat COMMAND VkFFTBackendTestDriver itkVkMultiResolutionPyramidImageFilterFactoryTest float - ) +) itk_add_test(NAME itkVkMultiResolutionPyramidImageFilterFactoryTestDouble COMMAND VkFFTBackendTestDriver itkVkMultiResolutionPyramidImageFilterFactoryTest double - ) +) # ----------------------------------------------------------------------------- # DiscreteGaussianImageFilterTest (2 entries: expect-spatial and expect-FFT) @@ -324,7 +332,7 @@ itk_add_test(NAME itkVkDiscreteGaussianImageFilterTestFloat 0 # Expect spatial 8.0 # Threshold for FFT to run 2.0 # Sigma - ) +) itk_add_test(NAME itkVkDiscreteGaussianImageFilterTestDouble COMMAND VkFFTBackendTestDriver --compare @@ -336,7 +344,7 @@ itk_add_test(NAME itkVkDiscreteGaussianImageFilterTestDouble 0 # Expect spatial 8.0 # Threshold for FFT to run 2.0 # Sigma - ) +) _vkfft_disable_on_unsupported_fp64(itkVkDiscreteGaussianImageFilterTestDouble) itk_add_test(NAME itkVkDiscreteGaussianImageFilterTest2Float @@ -350,7 +358,7 @@ itk_add_test(NAME itkVkDiscreteGaussianImageFilterTest2Float 1 # Expect FFT 2.0 # Lower threshold for FFT to run 2.0 # Sigma - ) +) itk_add_test(NAME itkVkDiscreteGaussianImageFilterTest2Double COMMAND VkFFTBackendTestDriver --compare @@ -362,7 +370,7 @@ itk_add_test(NAME itkVkDiscreteGaussianImageFilterTest2Double 1 # Expect FFT 2.0 # Lower threshold for FFT to run 2.0 # Sigma - ) +) _vkfft_disable_on_unsupported_fp64(itkVkDiscreteGaussianImageFilterTest2Double) # Disable every FFT-touching test when VKFFT_BACKEND=4 (Level Zero) is selected @@ -370,7 +378,8 @@ _vkfft_disable_on_unsupported_fp64(itkVkDiscreteGaussianImageFilterTest2Double) # top-level CMakeLists.txt). The lightweight factory / global-configuration # tests still run because they exercise no GPU code. if(VKFFT_BACKEND EQUAL 4 AND NOT VkFFTBackend_LEVEL_ZERO_RUNTIME_AVAILABLE) - foreach(_stem + foreach( + _stem itkVkComplexToComplexFFTImageFilterTest itkVkComplexToComplex1DFFTImageFilterSizesTest itkVkComplexToComplex1DFFTImageFilterBaselineTest @@ -385,7 +394,12 @@ if(VKFFT_BACKEND EQUAL 4 AND NOT VkFFTBackend_LEVEL_ZERO_RUNTIME_AVAILABLE) ) foreach(_suffix Float Double FloatPoclSafe DoublePoclSafe) if(TEST ${_stem}${_suffix}) - set_tests_properties(${_stem}${_suffix} PROPERTIES DISABLED TRUE) + set_tests_properties( + ${_stem}${_suffix} + PROPERTIES + DISABLED + TRUE + ) endif() endforeach() endforeach() diff --git a/wrapping/itkVkComplexToComplex1DFFTImageFilter.wrap b/wrapping/itkVkComplexToComplex1DFFTImageFilter.wrap index ef00b73..c9b366c 100644 --- a/wrapping/itkVkComplexToComplex1DFFTImageFilter.wrap +++ b/wrapping/itkVkComplexToComplex1DFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkComplexToComplex1DFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(CF 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(CF 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(CD 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(CD 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkComplexToComplexFFTImageFilter.wrap b/wrapping/itkVkComplexToComplexFFTImageFilter.wrap index dd47c7b..a0176e8 100644 --- a/wrapping/itkVkComplexToComplexFFTImageFilter.wrap +++ b/wrapping/itkVkComplexToComplexFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkComplexToComplexFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(CF 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(CF 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(CD 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(CD 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkDiscreteGaussianImageFilter.wrap b/wrapping/itkVkDiscreteGaussianImageFilter.wrap index 57f116b..f15c543 100644 --- a/wrapping/itkVkDiscreteGaussianImageFilter.wrap +++ b/wrapping/itkVkDiscreteGaussianImageFilter.wrap @@ -1,3 +1,3 @@ itk_wrap_class("itk::VkDiscreteGaussianImageFilter" POINTER) - itk_wrap_image_filter("${WRAP_ITK_SCALAR}" 2) +itk_wrap_image_filter("${WRAP_ITK_SCALAR}" 2) itk_end_wrap_class() diff --git a/wrapping/itkVkForward1DFFTImageFilter.wrap b/wrapping/itkVkForward1DFFTImageFilter.wrap index 776792c..99348d1 100644 --- a/wrapping/itkVkForward1DFFTImageFilter.wrap +++ b/wrapping/itkVkForward1DFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkForward1DFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(F 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(F 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(D 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(D 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkForwardFFTImageFilter.wrap b/wrapping/itkVkForwardFFTImageFilter.wrap index 1ac8b51..f5321ce 100644 --- a/wrapping/itkVkForwardFFTImageFilter.wrap +++ b/wrapping/itkVkForwardFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkForwardFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(F 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(F 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(D 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(D 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkHalfHermitianToRealInverseFFTImageFilter.wrap b/wrapping/itkVkHalfHermitianToRealInverseFFTImageFilter.wrap index a09ef50..ecb9d99 100644 --- a/wrapping/itkVkHalfHermitianToRealInverseFFTImageFilter.wrap +++ b/wrapping/itkVkHalfHermitianToRealInverseFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkHalfHermitianToRealInverseFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(CF 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(CF 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(CD 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(CD 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkInverse1DFFTImageFilter.wrap b/wrapping/itkVkInverse1DFFTImageFilter.wrap index 93b59fe..f295761 100644 --- a/wrapping/itkVkInverse1DFFTImageFilter.wrap +++ b/wrapping/itkVkInverse1DFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkInverse1DFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(CF 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(CF 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(CD 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(CD 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkInverseFFTImageFilter.wrap b/wrapping/itkVkInverseFFTImageFilter.wrap index b33e851..9d3b91f 100644 --- a/wrapping/itkVkInverseFFTImageFilter.wrap +++ b/wrapping/itkVkInverseFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkInverseFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(CF 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(CF 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(CD 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(CD 1 1;2;3) +endif() itk_end_wrap_class() diff --git a/wrapping/itkVkMultiResolutionPyramidImageFilter.wrap b/wrapping/itkVkMultiResolutionPyramidImageFilter.wrap index 3e0cd79..d8984d4 100644 --- a/wrapping/itkVkMultiResolutionPyramidImageFilter.wrap +++ b/wrapping/itkVkMultiResolutionPyramidImageFilter.wrap @@ -1,3 +1,3 @@ itk_wrap_class("itk::VkMultiResolutionPyramidImageFilter" POINTER) - itk_wrap_image_filter("${WRAP_ITK_SCALAR}" 2) +itk_wrap_image_filter("${WRAP_ITK_SCALAR}" 2) itk_end_wrap_class() diff --git a/wrapping/itkVkRealToHalfHermitianForwardFFTImageFilter.wrap b/wrapping/itkVkRealToHalfHermitianForwardFFTImageFilter.wrap index 5486dff..8a513b5 100644 --- a/wrapping/itkVkRealToHalfHermitianForwardFFTImageFilter.wrap +++ b/wrapping/itkVkRealToHalfHermitianForwardFFTImageFilter.wrap @@ -1,9 +1,9 @@ itk_wrap_class("itk::VkRealToHalfHermitianForwardFFTImageFilter" POINTER) - if(ITK_WRAP_COMPLEX_FLOAT) - itk_wrap_image_filter(F 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_FLOAT) + itk_wrap_image_filter(F 1 1;2;3) +endif() - if(ITK_WRAP_COMPLEX_DOUBLE) - itk_wrap_image_filter(D 1 1;2;3) - endif() +if(ITK_WRAP_COMPLEX_DOUBLE) + itk_wrap_image_filter(D 1 1;2;3) +endif() itk_end_wrap_class() From 62c433d26e6f94c5f7afe6d824a45050f072b71a Mon Sep 17 00:00:00 2001 From: "Hans J. Johnson" Date: Mon, 8 Jun 2026 12:47:35 -0500 Subject: [PATCH 3/4] STYLE: Apply ITK clang-format, black, and pyupgrade formatting Run the ITK-pinned formatters over the tree: clang-format v19.1.7 (itkVkCommon.cxx), black 24.2.0 and pyupgrade on the Python files, and the standard end-of-file / trailing-whitespace hygiene fixes. Formatting only; VKFFT_BACKEND=1 builds and passes 36/36 VkFFTBackend tests on CUDA hardware. --- .cirun.yml | 2 +- example/AcceleratedForwardFFT.ipynb | 2 +- example/AcceleratedGaussianBlurring.ipynb | 2 +- example/fft_benchmark.py | 1 + src/itkVkCommon.cxx | 36 +++++++++++++---------- wrapping/test/itkVkFFTInitFactoryTest.py | 2 +- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/.cirun.yml b/.cirun.yml index e53d109..8a70eba 100644 --- a/.cirun.yml +++ b/.cirun.yml @@ -17,7 +17,7 @@ runners: # Add a label to match runs-on param in Github Actions yml files labels: - gpu - + - name: notebook-gpu-runner # Cloud Provider: Amazon Web Services cloud: aws diff --git a/example/AcceleratedForwardFFT.ipynb b/example/AcceleratedForwardFFT.ipynb index a40fd20..c3dc7d2 100644 --- a/example/AcceleratedForwardFFT.ipynb +++ b/example/AcceleratedForwardFFT.ipynb @@ -289,4 +289,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/example/AcceleratedGaussianBlurring.ipynb b/example/AcceleratedGaussianBlurring.ipynb index aeaeec1..b6023e5 100644 --- a/example/AcceleratedGaussianBlurring.ipynb +++ b/example/AcceleratedGaussianBlurring.ipynb @@ -336,4 +336,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/example/fft_benchmark.py b/example/fft_benchmark.py index 09425f4..8093597 100644 --- a/example/fft_benchmark.py +++ b/example/fft_benchmark.py @@ -29,6 +29,7 @@ complex_image_type = itk.Image[itk.complex[pixel_type], dimension] fft_filter_type = itk.ForwardFFTImageFilter[image_type, complex_image_type] + # Return time for filter.Update() def benchmark_fft(itk_fft_filter: fft_filter_type) -> float: start = time.time() diff --git a/src/itkVkCommon.cxx b/src/itkVkCommon.cxx index 93d3181..5030620 100644 --- a/src/itkVkCommon.cxx +++ b/src/itkVkCommon.cxx @@ -82,11 +82,11 @@ VkCommon::ConfigureBackend() res = cuDeviceGet(&m_VkGPU.device, (int)m_VkGPU.device_id); if (res != CUDA_SUCCESS) return VkFFTResult{ VKFFT_ERROR_FAILED_TO_GET_DEVICE }; -#if CUDA_VERSION >= 13000 +# if CUDA_VERSION >= 13000 res = cuCtxCreate(&m_VkGPU.context, nullptr, 0, (int)m_VkGPU.device); -#else +# else res = cuCtxCreate(&m_VkGPU.context, 0, (int)m_VkGPU.device); -#endif +# endif if (res != CUDA_SUCCESS) return VkFFTResult{ VKFFT_ERROR_FAILED_TO_CREATE_CONTEXT }; @@ -146,9 +146,9 @@ VkCommon::ConfigureBackend() { m_VkGPU.platform = platforms[j]; m_VkGPU.device = deviceList[i]; - const cl_context_properties contextProperties[]{ - CL_CONTEXT_PLATFORM, reinterpret_cast(m_VkGPU.platform), 0 - }; + const cl_context_properties contextProperties[]{ CL_CONTEXT_PLATFORM, + reinterpret_cast(m_VkGPU.platform), + 0 }; m_VkGPU.context = clCreateContext(contextProperties, 1, &m_VkGPU.device, NULL, NULL, &resCL); if (resCL != CL_SUCCESS) { @@ -560,15 +560,16 @@ VkCommon::PerformFFT() return VkFFTResult{ VKFFT_ERROR_FAILED_TO_COPY }; } #elif (VKFFT_BACKEND == LEVEL_ZERO) - ze_result_t resZE{ ZE_RESULT_SUCCESS }; - void * inputGPUBuffer{ nullptr }; - void * GPUBuffer{ nullptr }; - void * outputGPUBuffer{ nullptr }; + ze_result_t resZE{ ZE_RESULT_SUCCESS }; + void * inputGPUBuffer{ nullptr }; + void * GPUBuffer{ nullptr }; + void * outputGPUBuffer{ nullptr }; ze_device_mem_alloc_desc_t deviceMemDesc{}; deviceMemDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; const uint64_t bufferBytes{ 2UL * m_VkParameters.PSize * *m_VkFFTConfiguration.bufferSize }; - resZE = zeMemAllocDevice(m_VkGPU.context, &deviceMemDesc, bufferBytes, m_VkParameters.PSize, m_VkGPU.device, &GPUBuffer); + resZE = + zeMemAllocDevice(m_VkGPU.context, &deviceMemDesc, bufferBytes, m_VkParameters.PSize, m_VkGPU.device, &GPUBuffer); if (resZE != ZE_RESULT_SUCCESS) return VkFFTResult{ VKFFT_ERROR_FAILED_TO_ALLOCATE }; m_VkFFTConfiguration.buffer = &GPUBuffer; @@ -610,8 +611,13 @@ VkCommon::PerformFFT() resZE = zeCommandListCreateImmediate(m_VkGPU.context, m_VkGPU.device, ©QueueDesc, ©CommandList); if (resZE != ZE_RESULT_SUCCESS) return VkFFTResult{ VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST }; - resZE = zeCommandListAppendMemoryCopy( - copyCommandList, inputGPUBuffer, m_VkParameters.inputCPUBuffer, m_VkParameters.inputBufferBytes, nullptr, 0, nullptr); + resZE = zeCommandListAppendMemoryCopy(copyCommandList, + inputGPUBuffer, + m_VkParameters.inputCPUBuffer, + m_VkParameters.inputBufferBytes, + nullptr, + 0, + nullptr); if (resZE != ZE_RESULT_SUCCESS) return VkFFTResult{ VKFFT_ERROR_FAILED_TO_COPY }; resZE = zeCommandQueueSynchronize(m_VkGPU.commandQueue, UINT32_MAX); @@ -684,7 +690,7 @@ VkCommon::PerformFFT() #elif (VKFFT_BACKEND == OPENCL) launchParams.commandQueue = &m_VkGPU.commandQueue; #elif (VKFFT_BACKEND == LEVEL_ZERO) - ze_command_list_desc_t commandListDescription{}; + ze_command_list_desc_t commandListDescription{}; commandListDescription.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; commandListDescription.commandQueueGroupOrdinal = m_VkGPU.commandQueueID; ze_command_list_handle_t launchCommandList{ nullptr }; @@ -693,7 +699,7 @@ VkCommon::PerformFFT() return VkFFTResult{ VKFFT_ERROR_FAILED_TO_CREATE_COMMAND_LIST }; launchParams.commandList = &launchCommandList; #elif (VKFFT_BACKEND == METAL) - MTL::CommandBuffer * metalCommandBuffer = m_VkGPU.queue->commandBuffer(); + MTL::CommandBuffer * metalCommandBuffer = m_VkGPU.queue->commandBuffer(); MTL::ComputeCommandEncoder * metalEncoder = metalCommandBuffer->computeCommandEncoder(); launchParams.commandBuffer = metalCommandBuffer; launchParams.commandEncoder = metalEncoder; diff --git a/wrapping/test/itkVkFFTInitFactoryTest.py b/wrapping/test/itkVkFFTInitFactoryTest.py index 29f5225..f03cfb3 100644 --- a/wrapping/test/itkVkFFTInitFactoryTest.py +++ b/wrapping/test/itkVkFFTInitFactoryTest.py @@ -68,7 +68,7 @@ ), ] -for (base_filter_type, vk_filter_type) in image_filter_list: +for base_filter_type, vk_filter_type in image_filter_list: # Instantiate through the ITK object factory image_filter = base_filter_type.New() assert image_filter is not None From 0bcd07423ac1b58547ddf6edefbbad4865bc6ef9 Mon Sep 17 00:00:00 2001 From: "Hans J. Johnson" Date: Mon, 8 Jun 2026 12:47:35 -0500 Subject: [PATCH 4/4] COMP: Add pre-commit configuration matching the ITK toolchain Pin the formatters to the exact versions ITK uses so CMake, C++, and Python formatting stays byte-identical with upstream: gersemi 0.19.3, clang-format v19.1.7, black 24.2.0, pyupgrade v3.21.2. gersemi and the formatters are not stable across releases, so matching ITK's pins is required to avoid reformatting churn against ITK CI. The version-insensitive hygiene hooks (pre-commit-hooks) track their latest release, v6.0.0. --- .pre-commit-config.yaml | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7013c2e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,42 @@ +# .pre-commit-config.yaml +# for details see https://pre-commit.com +# Hook versions are pinned to match the upstream ITK toolchain so that +# formatting stays byte-identical with ITK CI (gersemi in particular is +# not forward/backward compatible across releases). +fail_fast: true +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-added-large-files + args: ['--maxkb=200'] + - id: check-merge-conflict + args: ['--assume-in-merge'] + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v19.1.7 + hooks: + - id: clang-format + args: ['--style=file'] + files: '\.(c|cc|h|cxx|hxx|cu)$' + + - repo: https://github.com/psf/black + rev: 24.2.0 + hooks: + - id: black + args: ['--target-version', 'py310'] + + - repo: https://github.com/asottile/pyupgrade + rev: v3.21.2 + hooks: + - id: pyupgrade + args: ['--py310-plus'] + + - repo: https://github.com/BlankSpruce/gersemi + rev: 0.19.3 + hooks: + - id: gersemi + args: ['--config', '.gersemi.config', '-i'] + files: '\.(cmake|wrap)$|CMakeLists.txt'