Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/



Expand Down Expand Up @@ -645,4 +645,4 @@ FodyWeavers.xsd
*.msp

# JetBrains Rider
*.sln.iml
*.sln.iml
321 changes: 316 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,326 @@
#cmake_minimum_required(VERSION 3.23 FATAL_ERROR)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
# cmake version 3.23 or higher is needed to support
# the argument CUDA_ARCHITECTURES all-major
cmake_minimum_required(VERSION 3.23 FATAL_ERROR)

project(leapct)
project(leapct LANGUAGES CXX)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

ENABLE_TESTING()
# Keep project-local helper modules under version control in one place.
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

# ------------------------------------------------------------------------------
# GPU backend selection
#
# We select a default accelerator type by probing for available languages.
# Users can always override this at configure time, for example:
#
# cmake -S . -B build -DLEAP_GPU=NVIDIA
# cmake -S . -B build -DLEAP_GPU=AMD
# cmake -S . -B build -DLEAP_GPU=None
#
# NOTE:
# The default is chosen at configure time based on available compilers/toolchains,
# not based on a runtime-visible GPU device. This is more reliable than the old
# setup.py approach that queried torch.cuda.get_device_name(0).
# ------------------------------------------------------------------------------

set(_default_accelerator_type "None")
include(CheckLanguage)

check_language(HIP)
if(CMAKE_HIP_COMPILER)
set(_default_accelerator_type "AMD")
endif()

check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
set(_default_accelerator_type "NVIDIA")
endif()

set(LEAP_GPU "${_default_accelerator_type}" CACHE STRING "GPU acceleration type")
unset(_default_accelerator_type)

set_property(CACHE LEAP_GPU PROPERTY STRINGS "NVIDIA" "AMD" "None")
get_property(OPT_STRINGS CACHE LEAP_GPU PROPERTY STRINGS)

if (NOT LEAP_GPU IN_LIST OPT_STRINGS)
message(FATAL_ERROR "Wrong value of the parameter 'LEAP_GPU': ${LEAP_GPU}")
endif ()

message(STATUS "LEAP_GPU selected accelerator type ${LEAP_GPU}")

# Normalize for branch comparisons below.
string(TOUPPER "${LEAP_GPU}" LEAP_GPU)

# ------------------------------------------------------------------------------
# Shared path variables used by src/CMakeLists.txt
#
# LEAP_SELECTED_SRC_DIR:
# The source root that src/CMakeLists.txt should build from.
# - original src/ for CPU or CUDA builds
# - torch-generated hipified tree when torch helper succeeds
#
# LEAP_HIPIFIED_SRC_DIR:
# Generated files directory under the build tree.
#
# LEAP_HIPIFY_STAGE_DIR:
# Build-local staging tree containing copied raw inputs used by hipify-clang.
#
# HIPIFY_METHOD:
# none, torch, hipify-clang
# ------------------------------------------------------------------------------

set(LEAP_ORIGINAL_SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
set(LEAP_SELECTED_SRC_DIR "${LEAP_ORIGINAL_SRC_DIR}")
set(LEAP_HIPIFIED_SRC_DIR "${CMAKE_CURRENT_BINARY_DIR}/hipified_src")
set(LEAP_HIPIFY_STAGE_DIR "${CMAKE_CURRENT_BINARY_DIR}/hipify_stage")
set(HIPIFY_METHOD "none")

# ------------------------------------------------------------------------------
# NVIDIA CUDA build
# ------------------------------------------------------------------------------

if (LEAP_GPU STREQUAL "NVIDIA")
# To minimize binary size and compile time, users are suggested to
# set the CUDAARCHS environment variable or define the CMake variable
# CMAKE_CUDA_ARCHITECTURES to "native" if building specifically for the
# GPU visible to the build environment, or to an explicit numerical
# architecture code, such as "70".
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES all-major)
endif()

enable_language(CUDA)
set(LEAP_CUDA ON)

# ------------------------------------------------------------------------------
# AMD HIP build
#
# For AMD builds, we do not try to compile raw CUDA sources directly. Instead:
# 1. Prefer torch-based hipify if torch is importable at build time
# 2. Otherwise fall back to per-file hipify-clang via cmake/LeapHipifyClang.cmake
# 3. Otherwise fail early
#
# In this repository, step 1 is the supported path. It writes translated files
# into ${CMAKE_BINARY_DIR}/hipified_src, keeps generated artifacts out of src/,
# and is the only path that has been validated end-to-end for ROCm 7.2.1.
# The hipify-clang branch remains available as an escape hatch for other
# installations, but it is still experimental here.
#
# IMPORTANT:
# ROCm 6.4 hipify-clang does not support the directory-wide "-i -o src dst"
# style used in some examples. Therefore we only detect hipify-clang here.
# The helper is selected here and invoked from src/CMakeLists.txt.
# ------------------------------------------------------------------------------

elseif(LEAP_GPU STREQUAL "AMD")
enable_language(HIP)
set(LEAP_HIP ON)

# Controls for hipify behavior.
#
# HIPIFY_PREFER_TORCH:
# Try torch.utils.hipify first if torch is importable.
#
# HIPIFY_ALLOW_TOOLCHAIN:
# Allow the experimental hipify-clang fallback if torch hipify is
# unavailable or intentionally disabled.
#
# HIPIFY_FAIL_IF_MISSING:
# Recommended ON for AMD builds, since compiling untranslated CUDA
# sources as HIP is usually not what you want.
option(HIPIFY_PREFER_TORCH "Prefer torch-based hipify when torch is importable" ON)
option(HIPIFY_ALLOW_TOOLCHAIN "Allow fallback to hipify-clang" ON)
option(HIPIFY_FAIL_IF_MISSING "Fail if AMD build is requested and hipify cannot run" ON)

# These are only used for the torch-helper branch. The per-file clang
# branch manages its own staging and generated trees inside src/CMakeLists.txt.
file(MAKE_DIRECTORY "${LEAP_HIPIFIED_SRC_DIR}")

set(HIPIFY_DONE_FILE "${LEAP_HIPIFIED_SRC_DIR}/hipify.done")
set(HIPIFY_SKIPPED_FILE "${LEAP_HIPIFIED_SRC_DIR}/hipify.skipped")
set(HIPIFY_FAILED_FILE "${LEAP_HIPIFIED_SRC_DIR}/hipify.failed")

file(REMOVE "${HIPIFY_DONE_FILE}")
file(REMOVE "${HIPIFY_SKIPPED_FILE}")
file(REMOVE "${HIPIFY_FAILED_FILE}")

# Optional explicit request for hipify behavior.
#
# Example:
# HIPIFY_AT_BUILD=1 cmake -S . -B build -DLEAP_GPU=AMD
#
# The helper script may use this to force a torch hipify attempt even if
# the installed torch is not ROCm-enabled.
set(HIPIFY_REQUESTED OFF)
if(DEFINED ENV{HIPIFY_AT_BUILD})
if("$ENV{HIPIFY_AT_BUILD}" STREQUAL "1" OR
"$ENV{HIPIFY_AT_BUILD}" STREQUAL "true" OR
"$ENV{HIPIFY_AT_BUILD}" STREQUAL "TRUE" OR
"$ENV{HIPIFY_AT_BUILD}" STREQUAL "ON")
set(HIPIFY_REQUESTED ON)
endif()
endif()

# --------------------------------------------------------------------------
# Path A, torch-based hipify
#
# This is the preferred AMD path in this repository. It uses PyTorch's
# hipify utilities when torch is available in the build environment and
# generates a full build-local source tree under ${LEAP_HIPIFIED_SRC_DIR}.
#
# IMPORTANT:
# In default isolated PEP 517 builds, torch is NOT available unless the
# build environment provides it explicitly. In that common case, this
# branch will be skipped and we will try hipify-clang instead.
# --------------------------------------------------------------------------
if(HIPIFY_PREFER_TORCH)
find_package(Python3 COMPONENTS Interpreter QUIET)

if(Python3_Interpreter_FOUND)
execute_process(
COMMAND ${Python3_EXECUTABLE} -c "import torch; print('yes')"
RESULT_VARIABLE TORCH_IMPORT_RESULT
OUTPUT_QUIET
ERROR_QUIET
)

if(TORCH_IMPORT_RESULT EQUAL 0)
message(STATUS "torch import succeeded, trying torch-based hipify")

execute_process(
COMMAND
${Python3_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/tools/hipify_torch.py
${LEAP_ORIGINAL_SRC_DIR}
${LEAP_HIPIFIED_SRC_DIR}
--project-root ${CMAKE_CURRENT_SOURCE_DIR}
--include src/*.cu
--include src/*.cuh
--include src/*.cpp
--include src/*.h
--header-include-dir ${LEAP_ORIGINAL_SRC_DIR}
--ignore build/**
--ignore .git/**
--ignore **/CMakeFiles/**
--copy-tree-first
RESULT_VARIABLE HIPIFY_TORCH_RESULT
OUTPUT_VARIABLE HIPIFY_TORCH_OUT
ERROR_VARIABLE HIPIFY_TORCH_ERR
)

message(STATUS "torch hipify stdout:\n${HIPIFY_TORCH_OUT}")

if(HIPIFY_TORCH_RESULT EQUAL 0 AND EXISTS "${HIPIFY_DONE_FILE}")
set(HIPIFY_METHOD "torch")
set(LEAP_SELECTED_SRC_DIR "${LEAP_HIPIFIED_SRC_DIR}")
elseif(HIPIFY_TORCH_RESULT EQUAL 0 AND EXISTS "${HIPIFY_SKIPPED_FILE}")
message(STATUS "torch hipify helper skipped transformation")
else()
message(WARNING "torch hipify helper failed: ${HIPIFY_TORCH_ERR}")
endif()
else()
message(STATUS "torch not importable in build environment")
endif()
else()
message(STATUS "Python3 interpreter not found, skipping torch hipify path")
endif()
endif()

# --------------------------------------------------------------------------
# Path B, hipify-clang fallback
#
# We only locate the executable here and record that this method is
# available. The actual per-file translation is performed in
# src/CMakeLists.txt.
#
# This path is intentionally retained for toolchain experiments, but on a
# ROCm 7.2.1 install it has shown multiple tool-side failure modes:
# missing outputs in direct -o mode, "-p ... -o ..." conflicts, and CUDA
# arch flags not always propagating into the tool's internal compile step.
# Treat it as poorly tested unless you have verified your local ROCm build.
# If a future hipify-clang release improves this, first re-check whether:
# - directory-oriented translation can replace the per-file flow
# - -p and -o now work together for the file types we translate
# - CUDA arch / extra-arg settings propagate into the tool's internal
# compile step consistently
# - LEAP still needs the local compatibility shim headers afterward
#
# If later you discover that your hipify-clang installation needs extra
# arguments, add them here as cache variables and pass them down to src/.
# For example:
#
# set(LEAP_HIPIFY_EXTRA_ARGS "--some-flag" CACHE STRING "extra hipify args")
#
# and then consume them in src/CMakeLists.txt.
# --------------------------------------------------------------------------
if(HIPIFY_METHOD STREQUAL "none" AND HIPIFY_ALLOW_TOOLCHAIN)
if(DEFINED ENV{ROCM_HOME})
set(ROCM_HOME $ENV{ROCM_HOME})
endif()

find_program(HIPIFY_CLANG hipify-clang
HINTS
$ENV{ROCM_HOME}/bin
PATHS
ENV PATH
)

if(HIPIFY_CLANG)
message(STATUS "Found hipify-clang fallback: ${HIPIFY_CLANG}")
set(HIPIFY_METHOD "hipify-clang")
set(LEAP_HIPIFY_CLANG_EXECUTABLE "${HIPIFY_CLANG}")
else()
message(STATUS "hipify-clang not found")
endif()
endif()

# --------------------------------------------------------------------------
# AMD build must have a successful translation path
# --------------------------------------------------------------------------
if(HIPIFY_METHOD STREQUAL "none")
if(HIPIFY_FAIL_IF_MISSING)
message(FATAL_ERROR
"LEAP_GPU=AMD was requested, but no hipify path succeeded. "
"Install torch in a non-isolated build environment, or install ROCm hipify-clang."
)
else()
message(WARNING
"LEAP_GPU=AMD requested, but hipify did not run. "
"Build may fail if raw CUDA sources are not HIP-compatible."
)
endif()
else()
message(STATUS "HIPIFY_METHOD selected: ${HIPIFY_METHOD}")
endif()

# ------------------------------------------------------------------------------
# CPU-only build
# ------------------------------------------------------------------------------

elseif(LEAP_GPU STREQUAL "NONE")
set(LEAP_CPU_ONLY ON)

else()
message(FATAL_ERROR "CMake scripting error: ${LEAP_GPU} didn't match.")
endif()

# ------------------------------------------------------------------------------
# Testing and output directories
# ------------------------------------------------------------------------------

if((BUILD_TESTING) OR (NOT DEFINED BUILD_TESTING))
enable_testing()
endif()

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)


# ------------------------------------------------------------------------------
# Delegate actual target creation and per-file source handling to src/
# ------------------------------------------------------------------------------

add_subdirectory(src)
Loading