From ac2d48f6ddfe8312cf3a90f75f63057e60ebeecc Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 19:27:59 +0900 Subject: [PATCH 01/11] ci: remove 459-restructure-directories branch from workflows --- .github/workflows/test_cxx_backend.yml | 2 +- .github/workflows/test_fortran.yml | 2 +- .github/workflows/test_python.yml | 17 +- backend/cxx/morse_LICENCE.txt | 38 +++ cmake/modules/FindLAPACKE.cmake | 380 +++++++++++++++++++++++++ 5 files changed, 424 insertions(+), 15 deletions(-) create mode 100644 backend/cxx/morse_LICENCE.txt create mode 100644 cmake/modules/FindLAPACKE.cmake diff --git a/.github/workflows/test_cxx_backend.yml b/.github/workflows/test_cxx_backend.yml index 87d22afd..963948b5 100644 --- a/.github/workflows/test_cxx_backend.yml +++ b/.github/workflows/test_cxx_backend.yml @@ -2,7 +2,7 @@ name: Test C++ Backend on: push: - branches: [ "main", "459-restructure-directories" ] + branches: [ "main" ] pull_request: branches: [ "main" ] workflow_dispatch: diff --git a/.github/workflows/test_fortran.yml b/.github/workflows/test_fortran.yml index e69fd05b..33ce5a18 100644 --- a/.github/workflows/test_fortran.yml +++ b/.github/workflows/test_fortran.yml @@ -2,7 +2,7 @@ name: Test Fortran Bindings on: push: - branches: [ "main", "459-restructure-directories" ] + branches: [ "main" ] pull_request: branches: [ "main" ] workflow_dispatch: diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 7e398fe5..a8d430c8 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - 459-restructure-directories pull_request: branches: - main @@ -50,19 +49,11 @@ jobs: with: enable-cache: true - - name: Setup build environment + - name: Setup and run tests working-directory: python - run: python3 setup_build.py - - - name: Run uv sync - working-directory: python - run: uv sync - env: - SPARSEIR_USE_BLAS: 1 - - - name: Run tests - working-directory: python - run: uv run pytest tests/ -v + run: | + chmod +x run_tests.sh + ./run_tests.sh env: SPARSEIR_USE_BLAS: 1 diff --git a/backend/cxx/morse_LICENCE.txt b/backend/cxx/morse_LICENCE.txt new file mode 100644 index 00000000..3b885875 --- /dev/null +++ b/backend/cxx/morse_LICENCE.txt @@ -0,0 +1,38 @@ +### +# +# @copyright (c) 2012-2020 Inria. All rights reserved. +# @copyright (c) 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# This software is a computer program whose purpose is to process +# Matrices Over Runtime Systems @ Exascale (MORSE). More information +# can be found on the following website: http://www.inria.fr/en/teams/morse. +# +# This software is governed by the CeCILL-C license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL-C +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL-C license and that you accept its terms. +# +### diff --git a/cmake/modules/FindLAPACKE.cmake b/cmake/modules/FindLAPACKE.cmake new file mode 100644 index 00000000..55fc87e6 --- /dev/null +++ b/cmake/modules/FindLAPACKE.cmake @@ -0,0 +1,380 @@ +### +# +# @copyright (c) 2009-2014 The University of Tennessee and The University +# of Tennessee Research Foundation. +# All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. +# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. +# +### +# +# - Find LAPACKE include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(LAPACKE +# [REQUIRED] # Fail with error if lapacke is not found +# [COMPONENTS ...] # dependencies +# ) +# +# LAPACKE depends on the following libraries: +# - LAPACK +# +# This module finds headers and lapacke library. +# Results are reported in variables: +# LAPACKE_FOUND - True if headers and requested libraries were found +# LAPACKE_LINKER_FLAGS - list of required linker flags (excluding -l and -L) +# LAPACKE_INCLUDE_DIRS - lapacke include directories +# LAPACKE_LIBRARY_DIRS - Link directories for lapacke libraries +# LAPACKE_LIBRARIES - lapacke component libraries to be linked +# LAPACKE_INCLUDE_DIRS_DEP - lapacke + dependencies include directories +# LAPACKE_LIBRARY_DIRS_DEP - lapacke + dependencies link directories +# LAPACKE_LIBRARIES_DEP - lapacke libraries + dependencies +# +# The user can give specific paths where to find the libraries adding cmake +# options at configure (ex: cmake path/to/project -DLAPACKE_DIR=path/to/lapacke): +# LAPACKE_DIR - Where to find the base directory of lapacke +# LAPACKE_INCDIR - Where to find the header files +# LAPACKE_LIBDIR - Where to find the library files +# The module can also look for the following environment variables if paths +# are not given as cmake variable: LAPACKE_DIR, LAPACKE_INCDIR, LAPACKE_LIBDIR +# +# LAPACKE could be directly embedded in LAPACK library (ex: Intel MKL) so that +# we test a lapacke function with the lapack libraries found and set LAPACKE +# variables to LAPACK ones if test is successful. To skip this feature and +# look for a stand alone lapacke, please add the following in your +# CMakeLists.txt before to call find_package(LAPACKE): +# set(LAPACKE_STANDALONE TRUE) + +#============================================================================= +# Copyright 2012-2013 Inria +# Copyright 2012-2013 Emmanuel Agullo +# Copyright 2012-2013 Mathieu Faverge +# Copyright 2012 Cedric Castagnede +# Copyright 2013-2016 Florent Pruvost +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file MORSE-Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of Morse, substitute the full +# License text for the above reference.) + +if (NOT LAPACKE_FOUND) + set(LAPACKE_DIR "" CACHE PATH "Installation directory of LAPACKE library") + if (NOT LAPACKE_FIND_QUIETLY) + message(STATUS "A cache variable, namely LAPACKE_DIR, has been set to specify the install directory of LAPACKE") + endif() +endif() + +# LAPACKE depends on LAPACK anyway, try to find it +if (NOT LAPACK_FOUND) + if(LAPACKE_FIND_REQUIRED) + find_package(LAPACKEXT REQUIRED) + else() + find_package(LAPACKEXT) + endif() +endif() + +# LAPACKE depends on LAPACK +if (LAPACK_FOUND) + + if (NOT LAPACKE_STANDALONE) + # check if a lapacke function exists in the LAPACK lib + include(CheckFunctionExists) + set(CMAKE_REQUIRED_LIBRARIES "${LAPACK_LINKER_FLAGS};${LAPACK_LIBRARIES}") + unset(LAPACKE_WORKS CACHE) + check_function_exists(LAPACKE_dgeqrf LAPACKE_WORKS) + mark_as_advanced(LAPACKE_WORKS) + set(CMAKE_REQUIRED_LIBRARIES) + + if(LAPACKE_WORKS) + if(NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke: test with lapack succeeds") + endif() + # test succeeds: LAPACKE is in LAPACK + set(LAPACKE_LIBRARIES "${LAPACK_LIBRARIES}") + set(LAPACKE_LIBRARIES_DEP "${LAPACK_LIBRARIES}") + if (LAPACK_LIBRARY_DIRS) + set(LAPACKE_LIBRARY_DIRS "${LAPACK_LIBRARY_DIRS}") + endif() + if(LAPACK_INCLUDE_DIRS) + set(LAPACKE_INCLUDE_DIRS "${LAPACK_INCLUDE_DIRS}") + set(LAPACKE_INCLUDE_DIRS_DEP "${LAPACK_INCLUDE_DIRS}") + endif() + if (LAPACK_LINKER_FLAGS) + set(LAPACKE_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}") + endif() + endif() + endif (NOT LAPACKE_STANDALONE) + + if (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) + + if(NOT LAPACKE_WORKS AND NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke : test with lapack fails") + endif() + # test fails: try to find LAPACKE lib exterior to LAPACK + + # Try to find LAPACKE lib + ####################### + + # Looking for include + # ------------------- + + # Add system include paths to search include + # ------------------------------------------ + unset(_inc_env) + set(ENV_LAPACKE_DIR "$ENV{LAPACKE_DIR}") + set(ENV_LAPACKE_INCDIR "$ENV{LAPACKE_INCDIR}") + if(ENV_LAPACKE_INCDIR) + list(APPEND _inc_env "${ENV_LAPACKE_INCDIR}") + elseif(ENV_LAPACKE_DIR) + list(APPEND _inc_env "${ENV_LAPACKE_DIR}") + list(APPEND _inc_env "${ENV_LAPACKE_DIR}/include") + list(APPEND _inc_env "${ENV_LAPACKE_DIR}/include/lapacke") + else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() + endif() + list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") + list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + list(REMOVE_DUPLICATES _inc_env) + + + # Try to find the lapacke header in the given paths + # ------------------------------------------------- + # call cmake macro to find the header path + if(LAPACKE_INCDIR) + set(LAPACKE_lapacke.h_DIRS "LAPACKE_lapacke.h_DIRS-NOTFOUND") + find_path(LAPACKE_lapacke.h_DIRS + NAMES lapacke.h + HINTS ${LAPACKE_INCDIR}) + else() + if(LAPACKE_DIR) + set(LAPACKE_lapacke.h_DIRS "LAPACKE_lapacke.h_DIRS-NOTFOUND") + find_path(LAPACKE_lapacke.h_DIRS + NAMES lapacke.h + HINTS ${LAPACKE_DIR} + PATH_SUFFIXES "include" "include/lapacke") + else() + set(LAPACKE_lapacke.h_DIRS "LAPACKE_lapacke.h_DIRS-NOTFOUND") + find_path(LAPACKE_lapacke.h_DIRS + NAMES lapacke.h + HINTS ${_inc_env}) + endif() + endif() + mark_as_advanced(LAPACKE_lapacke.h_DIRS) + + # If found, add path to cmake variable + # ------------------------------------ + if (LAPACKE_lapacke.h_DIRS) + set(LAPACKE_INCLUDE_DIRS "${LAPACKE_lapacke.h_DIRS}") + else () + set(LAPACKE_INCLUDE_DIRS "LAPACKE_INCLUDE_DIRS-NOTFOUND") + if(NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke -- lapacke.h not found") + endif() + endif() + + + # Looking for lib + # --------------- + + # Add system library paths to search lib + # -------------------------------------- + unset(_lib_env) + set(ENV_LAPACKE_LIBDIR "$ENV{LAPACKE_LIBDIR}") + if(ENV_LAPACKE_LIBDIR) + list(APPEND _lib_env "${ENV_LAPACKE_LIBDIR}") + elseif(ENV_LAPACKE_DIR) + list(APPEND _lib_env "${ENV_LAPACKE_DIR}") + list(APPEND _lib_env "${ENV_LAPACKE_DIR}/lib") + else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() + endif() + list(REMOVE_DUPLICATES _lib_env) + + # Try to find the lapacke lib in the given paths + # ---------------------------------------------- + + # call cmake macro to find the lib path + if(LAPACKE_LIBDIR) + set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") + find_library(LAPACKE_lapacke_LIBRARY + NAMES lapacke + HINTS ${LAPACKE_LIBDIR}) + else() + if(LAPACKE_DIR) + set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") + find_library(LAPACKE_lapacke_LIBRARY + NAMES lapacke + HINTS ${LAPACKE_DIR} + PATH_SUFFIXES lib lib32 lib64) + else() + set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") + find_library(LAPACKE_lapacke_LIBRARY + NAMES lapacke + HINTS ${_lib_env}) + endif() + endif() + mark_as_advanced(LAPACKE_lapacke_LIBRARY) + + # If found, add path to cmake variable + # ------------------------------------ + if (LAPACKE_lapacke_LIBRARY) + get_filename_component(lapacke_lib_path "${LAPACKE_lapacke_LIBRARY}" PATH) + # set cmake variables + set(LAPACKE_LIBRARIES "${LAPACKE_lapacke_LIBRARY}") + set(LAPACKE_LIBRARY_DIRS "${lapacke_lib_path}") + else () + set(LAPACKE_LIBRARIES "LAPACKE_LIBRARIES-NOTFOUND") + set(LAPACKE_LIBRARY_DIRS "LAPACKE_LIBRARY_DIRS-NOTFOUND") + if (NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke -- lib lapacke not found") + endif() + endif () + + # check a function to validate the find + if(LAPACKE_LIBRARIES) + + set(REQUIRED_LDFLAGS) + set(REQUIRED_INCDIRS) + set(REQUIRED_LIBDIRS) + set(REQUIRED_LIBS) + + # LAPACKE + if (LAPACKE_INCLUDE_DIRS) + set(REQUIRED_INCDIRS "${LAPACKE_INCLUDE_DIRS}") + endif() + if (LAPACKE_LIBRARY_DIRS) + set(REQUIRED_LIBDIRS "${LAPACKE_LIBRARY_DIRS}") + endif() + set(REQUIRED_LIBS "${LAPACKE_LIBRARIES}") + # LAPACK + if (LAPACK_INCLUDE_DIRS) + list(APPEND REQUIRED_INCDIRS "${LAPACK_INCLUDE_DIRS}") + endif() + if (LAPACK_LIBRARY_DIRS) + list(APPEND REQUIRED_LIBDIRS "${LAPACK_LIBRARY_DIRS}") + endif() + list(APPEND REQUIRED_LIBS "${LAPACK_LIBRARIES}") + if (LAPACK_LINKER_FLAGS) + list(APPEND REQUIRED_LDFLAGS "${LAPACK_LINKER_FLAGS}") + endif() + # Fortran + if (CMAKE_C_COMPILER_ID MATCHES "GNU") + find_library( + FORTRAN_gfortran_LIBRARY + NAMES gfortran + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_gfortran_LIBRARY) + if (FORTRAN_gfortran_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") + endif() + elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") + find_library( + FORTRAN_ifcore_LIBRARY + NAMES ifcore + HINTS ${_lib_env} + ) + mark_as_advanced(FORTRAN_ifcore_LIBRARY) + if (FORTRAN_ifcore_LIBRARY) + list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") + endif() + endif() + # m + find_library(M_LIBRARY NAMES m HINTS ${_lib_env}) + mark_as_advanced(M_LIBRARY) + if(M_LIBRARY) + list(APPEND REQUIRED_LIBS "-lm") + endif() + # set required libraries for link + set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") + set(CMAKE_REQUIRED_LIBRARIES) + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") + foreach(lib_dir ${REQUIRED_LIBDIRS}) + list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") + endforeach() + list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") + string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + + # test link + unset(LAPACKE_WORKS CACHE) + include(CheckFunctionExists) + check_function_exists(LAPACKE_dgeqrf LAPACKE_WORKS) + mark_as_advanced(LAPACKE_WORKS) + + if(LAPACKE_WORKS) + # save link with dependencies + set(LAPACKE_LIBRARIES_DEP "${REQUIRED_LIBS}") + set(LAPACKE_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") + set(LAPACKE_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") + set(LAPACKE_LINKER_FLAGS "${REQUIRED_LDFLAGS}") + list(REMOVE_DUPLICATES LAPACKE_LIBRARY_DIRS_DEP) + list(REMOVE_DUPLICATES LAPACKE_INCLUDE_DIRS_DEP) + list(REMOVE_DUPLICATES LAPACKE_LINKER_FLAGS) + else() + if(NOT LAPACKE_FIND_QUIETLY) + message(STATUS "Looking for lapacke: test of LAPACKE_dgeqrf with lapacke and lapack libraries fails") + message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") + message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") + message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") + endif() + endif() + set(CMAKE_REQUIRED_INCLUDES) + set(CMAKE_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_LIBRARIES) + endif(LAPACKE_LIBRARIES) + + endif (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) + +else(LAPACK_FOUND) + + if (NOT LAPACKE_FIND_QUIETLY) + message(STATUS "LAPACKE requires LAPACK but LAPACK has not been found." + "Please look for LAPACK first.") + endif() + +endif(LAPACK_FOUND) + +if (LAPACKE_LIBRARIES) + list(GET LAPACKE_LIBRARIES 0 first_lib) + get_filename_component(first_lib_path "${first_lib}" PATH) + if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") + string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") + set(LAPACKE_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of LAPACKE library" FORCE) + else() + set(LAPACKE_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of LAPACKE library" FORCE) + endif() +endif() +mark_as_advanced(LAPACKE_DIR) +mark_as_advanced(LAPACKE_DIR_FOUND) + +# check that LAPACKE has been found +# --------------------------------- +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LAPACKE DEFAULT_MSG + LAPACKE_LIBRARIES + LAPACKE_WORKS) From 19575d23eb18a324c4264a765f2a51372e847f84 Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 19:30:18 +0900 Subject: [PATCH 02/11] feat: implement dynamic Fortran BLAS resolution and ILP64/LP64 support - Make BLAS mandatory: Remove SPARSEIR_USE_BLAS option - Remove unused LAPACKE option: Remove SPARSEIR_USE_LAPACKE option - Implement dynamic Fortran BLAS symbol resolution at runtime - Add ILP64/LP64 support with automatic detection (prioritize ILP64) - Add combined registration functions: spir_register_dgemm_zgemm_lp64/ilp64 - Update Python bindings to use new combined registration - Remove unused FindLAPACKE.cmake and morse_LICENCE.txt - Add run_tests.sh script for automated Python testing --- CMakeLists.txt | 52 ++-- backend/cxx/CMakeLists.txt | 39 +-- backend/cxx/include/sparseir/gemm.hpp | 203 ++++++------- backend/cxx/include/sparseir/sparseir.h | 36 +++ backend/cxx/morse_LICENCE.txt | 38 --- backend/cxx/src/gemm.cpp | 311 +++++++++++++------ backend/cxx/test/CMakeLists.txt | 8 +- cmake/modules/FindLAPACKE.cmake | 380 ------------------------ python/CMakeLists.txt | 11 +- python/pylibsparseir/core.py | 9 +- python/run_tests.sh | 51 ++++ 11 files changed, 445 insertions(+), 693 deletions(-) delete mode 100644 backend/cxx/morse_LICENCE.txt delete mode 100644 cmake/modules/FindLAPACKE.cmake create mode 100755 python/run_tests.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index e89ec56d..0b47550b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,24 +49,21 @@ endif() # Eigen3 # BLAS/LAPACK options -option(SPARSEIR_USE_BLAS "Enable BLAS support" OFF) -option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface (requires SPARSEIR_USE_BLAS=ON)" OFF) - -# BLAS/LAPACK search and configuration -if(SPARSEIR_USE_BLAS) - add_compile_definitions(SPARSEIR_USE_BLAS) - message(STATUS "BLAS support enabled") - - # ILP64 support - if(SPARSEIR_USE_ILP64) - add_compile_definitions(SPARSEIR_USE_ILP64) - message(STATUS "ILP64 BLAS interface enabled") - else() - message(STATUS "LP64 BLAS interface enabled (default)") - endif() +option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface" OFF) + +# BLAS search and configuration (always enabled) +message(STATUS "BLAS support enabled") + +# ILP64 support +if(SPARSEIR_USE_ILP64) + add_compile_definitions(SPARSEIR_USE_ILP64) + message(STATUS "ILP64 BLAS interface enabled") +else() + message(STATUS "LP64 BLAS interface enabled (default)") +endif() - # Find BLAS package - if(SPARSEIR_USE_ILP64) +# Find BLAS package +if(SPARSEIR_USE_ILP64) # For ILP64, check if BLAS_LIBRARIES is already set by user if(BLAS_LIBRARIES) message(STATUS "ILP64 BLAS library specified: ${BLAS_LIBRARIES}") @@ -106,8 +103,6 @@ if(SPARSEIR_USE_BLAS) #else() #message(FATAL_ERROR "BLAS include directories not found") #endif() -elseif(SPARSEIR_USE_ILP64) - message(FATAL_ERROR "SPARSEIR_USE_ILP64 requires SPARSEIR_USE_BLAS=ON") endif() @@ -187,23 +182,14 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) -# Add BLAS/LAPACK linking -if(SPARSEIR_USE_BLAS) - if(SPARSEIR_USE_ILP64 OR BLAS_FOUND) - target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) - message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") - else() - message(FATAL_ERROR "BLAS libraries not found") - endif() +# Add BLAS linking (always enabled) +if(SPARSEIR_USE_ILP64 OR BLAS_FOUND) + target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) + message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") else() - message(STATUS "BLAS linking disabled") + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() -if(SPARSEIR_USE_LAPACKE AND LAPACK_FOUND) - target_link_libraries(sparseir PRIVATE ${LAPACK_LIBRARIES}) -endif() - - # macOS-specific configuration if(APPLE AND ACCELERATE_FRAMEWORK) target_link_libraries(sparseir PRIVATE ${ACCELERATE_FRAMEWORK}) diff --git a/backend/cxx/CMakeLists.txt b/backend/cxx/CMakeLists.txt index 02d8eae8..9b8002f9 100644 --- a/backend/cxx/CMakeLists.txt +++ b/backend/cxx/CMakeLists.txt @@ -20,9 +20,7 @@ set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Options -option(SPARSEIR_USE_BLAS "Enable BLAS support" OFF) option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface" OFF) -option(SPARSEIR_USE_LAPACKE "Enable LAPACKE support" OFF) option(SPARSEIR_BUILD_TESTING "Enable testing" OFF) option(SPARSEIR_DEBUG "Enable debug logging" OFF) @@ -57,18 +55,15 @@ if(NOT xprec_POPULATED) FetchContent_Populate(XPrec) endif() -# BLAS/LAPACK configuration -if(SPARSEIR_USE_BLAS) - add_compile_definitions(SPARSEIR_USE_BLAS) - if(SPARSEIR_USE_ILP64) - add_compile_definitions(SPARSEIR_USE_ILP64) - # Try to find ILP64 BLAS - if(NOT BLAS_LIBRARIES) - find_library(BLAS_LIBRARIES NAMES openblas64) - endif() - else() - find_package(BLAS REQUIRED) +# BLAS configuration (always enabled) +if(SPARSEIR_USE_ILP64) + add_compile_definitions(SPARSEIR_USE_ILP64) + # Try to find ILP64 BLAS + if(NOT BLAS_LIBRARIES) + find_library(BLAS_LIBRARIES NAMES openblas64) endif() +else() + find_package(BLAS REQUIRED) endif() # C++ Library Build @@ -111,20 +106,12 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) -# BLAS/LAPACK linking -if(SPARSEIR_USE_BLAS) - if(BLAS_LIBRARIES OR BLAS_FOUND) - target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) - message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") - else() - message(FATAL_ERROR "BLAS libraries not found") - endif() +# BLAS/LAPACK linking (required) +if(BLAS_LIBRARIES OR BLAS_FOUND) + target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) + message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") else() - message(STATUS "BLAS linking disabled") -endif() - -if(SPARSEIR_USE_LAPACKE AND LAPACK_FOUND) - target_link_libraries(sparseir PRIVATE ${LAPACK_LIBRARIES}) + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() # macOS Accelerate framework diff --git a/backend/cxx/include/sparseir/gemm.hpp b/backend/cxx/include/sparseir/gemm.hpp index 8c4a0f7b..df27e094 100644 --- a/backend/cxx/include/sparseir/gemm.hpp +++ b/backend/cxx/include/sparseir/gemm.hpp @@ -13,64 +13,35 @@ #include "sparseir/jacobi_svd.hpp" -#ifdef SPARSEIR_USE_BLAS -extern "C" { - -// Fortran BLAS interface declarations -#ifdef SPARSEIR_USE_ILP64 -// ILP64 Fortran BLAS interface (64-bit integers) -void dgemm_(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const double* alpha, const double* a, const long long* lda, - const double* b, const long long* ldb, const double* beta, - double* c, const long long* ldc); - -void zgemm_(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const void* alpha, const void* a, const long long* lda, - const void* b, const long long* ldb, const void* beta, - void* c, const long long* ldc); -#else -// LP64 Fortran BLAS interface (32-bit integers) -void dgemm_(const char* transa, const char* transb, - const int* m, const int* n, const int* k, - const double* alpha, const double* a, const int* lda, - const double* b, const int* ldb, const double* beta, - double* c, const int* ldc); - -void zgemm_(const char* transa, const char* transb, - const int* m, const int* n, const int* k, - const void* alpha, const void* a, const int* lda, - const void* b, const int* ldb, const void* beta, - void* c, const int* ldc); -#endif - - -enum CBLAS_TRANSPOSE { - CblasNoTrans = 111, - CblasTrans = 112, - CblasConjTrans = 113 -}; - -enum CBLAS_ORDER { CblasRowMajor = 101, CblasColMajor = 102 }; -} - -#endif - namespace sparseir { -#ifdef SPARSEIR_USE_BLAS +// Fortran BLAS interface wrapper functions (Fortran-style arguments) +// For LP64 (32-bit integers) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc); + +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc); + +// For ILP64 (64-bit integers) +void my_dgemm64(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc); + +void my_zgemm64(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc); -// Wrapper functions that call Fortran BLAS -void my_dgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const double alpha, - const double *A, const int64_t lda, const double *B, const int64_t ldb, - const double beta, double *C, const int64_t ldc); - -void my_zgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const void *alpha, - const void *A, const int64_t lda, const void *B, const int64_t ldb, - const void *beta, void *C, const int64_t ldc); // Type-specific CBLAS GEMM implementations template void _gemm_blas_impl(const U *A, const V *B, ResultType *C, int64_t M, int64_t N, @@ -92,8 +63,11 @@ inline void _gemm_blas_impl(const double *A, const double *B, double *C, int64_t M, int64_t N, int64_t K) { - my_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1.0, A, M, - B, K, 0.0, C, M); + const char transa = 'N', transb = 'N'; + const double alpha = 1.0, beta = 0.0; + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); + my_dgemm64(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -104,12 +78,15 @@ inline void _gemm_blas_impl, std::complex, std::complex *C, int64_t M, int64_t N, int64_t K) { + const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), M, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * complex -> complex @@ -124,12 +101,15 @@ inline void _gemm_blas_impl, std::complex>( A_complex[i] = std::complex(A[i], 0.0); } + const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A_complex.data()), M, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A_complex.data()), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for complex * double -> complex @@ -144,12 +124,15 @@ inline void _gemm_blas_impl, double, std::complex>( B_complex[i] = std::complex(B[i], 0.0); } + const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), M, - reinterpret_cast(B_complex.data()), K, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * double -> double (transpose B) @@ -159,8 +142,11 @@ inline void _gemm_blas_impl_transpose(const double *A, double *C, int64_t M, int64_t N, int64_t K) { - my_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1.0, A, M, B, - N, 0.0, C, M); + const char transa = 'N', transb = 'T'; + const double alpha = 1.0, beta = 0.0; + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); + my_dgemm64(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -173,12 +159,15 @@ _gemm_blas_impl_transpose, std::complex, std::complex *C, int64_t M, int64_t N, int64_t K) { + const char transa = 'N', transb = 'T'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, &alpha, - reinterpret_cast(A), M, - reinterpret_cast(B), N, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * complex -> complex (transpose B) @@ -228,8 +217,11 @@ inline void _gemm_blas_impl_conj(const double *A, const double *B, double *C, int64_t M, int64_t N, int64_t K) { - my_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1.0, A, K, B, - K, 0.0, C, M); + const char transa = 'T', transb = 'N'; + const double alpha = 1.0, beta = 0.0; + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); + my_dgemm64(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -240,12 +232,15 @@ inline void _gemm_blas_impl_conj, std::complex, const std::complex *A, const std::complex *B, std::complex *C, int64_t M, int64_t N, int64_t K) { + const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), K, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for complex * double -> complex (conjugate A) @@ -261,12 +256,15 @@ _gemm_blas_impl_conj, double, std::complex>( B_complex[i] = std::complex(B[i], 0.0); } + const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), K, - reinterpret_cast(B_complex.data()), K, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * complex -> complex (conjugate A, @@ -283,16 +281,17 @@ _gemm_blas_impl_conj, std::complex>( A_complex[i] = std::complex(A[i], 0.0); } + const char transa = 'T', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A_complex.data()), K, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); + my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A_complex.data()), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } -#endif - template void _gemm_inplace(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, int64_t K) { @@ -303,12 +302,8 @@ void _gemm_inplace(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, int64_ B, K, N); Eigen::Map> C_map(C, M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl(A, B, C, M, N, K); -#else - C_map.noalias() = A_map * B_map; -#endif } // second matrix is transposed @@ -321,12 +316,8 @@ void _gemm_inplace_t(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, int6 B, N, K); Eigen::Map> C_map(C, M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl_transpose(A, B, C, M, N, K); -#else - C_map.noalias() = A_map * B_map.transpose(); -#endif } // first matrix is complex conjugated @@ -339,12 +330,8 @@ void _gemm_inplace_conj(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, i B, K, N); Eigen::Map> C_map(C, M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl_conj(A, B, C, M, N, K); -#else - C_map.noalias() = A_map.adjoint() * B_map; -#endif } template @@ -360,14 +347,9 @@ _gemm(const Eigen::Matrix &A, Eigen::Matrix result(M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl(A.data(), B.data(), result.data(), M, N, K); -#else - (void)K; // Suppress unused variable warning - result.noalias() = A * B; -#endif return result; } @@ -386,14 +368,9 @@ _gemm( Eigen::Matrix result(M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl(A.data(), B.data(), result.data(), M, N, K); -#else - (void)K; // Suppress unused variable warning - result.noalias() = A * B; -#endif return result; } diff --git a/backend/cxx/include/sparseir/sparseir.h b/backend/cxx/include/sparseir/sparseir.h index de4f5aa8..ee5b17c0 100644 --- a/backend/cxx/include/sparseir/sparseir.h +++ b/backend/cxx/include/sparseir/sparseir.h @@ -1257,6 +1257,42 @@ int spir_sampling_fit_zd(const spir_sampling *s, int order, int ndim, const int *input_dims, int target_dim, const c_complex *input, double *out); +/** + * @brief Register BLAS function pointers for LP64 interface (32-bit integers) + * + * This function registers both dgemm and zgemm function pointers from an external + * BLAS library with LP64 interface (32-bit integers). This is used when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is defined at compile time. + * + * @param dgemm_fn Function pointer to Fortran dgemm (LP64 interface) + * @param zgemm_fn Function pointer to Fortran zgemm (LP64 interface) + * + * @note The function pointers must be valid Fortran BLAS functions with LP64 + * interface signature + * @note This function must be called before using any BLAS functionality when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled + * @note Only one registration function should be called (either LP64 or ILP64) + */ +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); + +/** + * @brief Register BLAS function pointers for ILP64 interface (64-bit integers) + * + * This function registers both dgemm and zgemm function pointers from an external + * BLAS library with ILP64 interface (64-bit integers). This is used when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is defined at compile time. + * + * @param dgemm_fn Function pointer to Fortran dgemm (ILP64 interface) + * @param zgemm_fn Function pointer to Fortran zgemm (ILP64 interface) + * + * @note The function pointers must be valid Fortran BLAS functions with ILP64 + * interface signature + * @note This function must be called before using any BLAS functionality when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled + * @note Only one registration function should be called (either LP64 or ILP64) + */ +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); + #ifdef __cplusplus } #endif diff --git a/backend/cxx/morse_LICENCE.txt b/backend/cxx/morse_LICENCE.txt deleted file mode 100644 index 3b885875..00000000 --- a/backend/cxx/morse_LICENCE.txt +++ /dev/null @@ -1,38 +0,0 @@ -### -# -# @copyright (c) 2012-2020 Inria. All rights reserved. -# @copyright (c) 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# This software is a computer program whose purpose is to process -# Matrices Over Runtime Systems @ Exascale (MORSE). More information -# can be found on the following website: http://www.inria.fr/en/teams/morse. -# -# This software is governed by the CeCILL-C license under French law and -# abiding by the rules of distribution of free software. You can use, -# modify and/ or redistribute the software under the terms of the CeCILL-C -# license as circulated by CEA, CNRS and INRIA at the following URL -# "http://www.cecill.info". -# -# As a counterpart to the access to the source code and rights to copy, -# modify and redistribute granted by the license, users are provided only -# with a limited warranty and the software's author, the holder of the -# economic rights, and the successive licensors have only limited -# liability. -# -# In this respect, the user's attention is drawn to the risks associated -# with loading, using, modifying and/or developing or reproducing the -# software by the user in light of its specific status of free software, -# that may mean that it is complicated to manipulate, and that also -# therefore means that it is reserved for developers and experienced -# professionals having in-depth computer knowledge. Users are therefore -# encouraged to load and test the software's suitability as regards their -# requirements in conditions enabling the security of their systems and/or -# data to be ensured and, more generally, to use and operate it in the -# same conditions as regards security. -# -# The fact that you are presently reading this means that you have had -# knowledge of the CeCILL-C license and that you accept its terms. -# -### diff --git a/backend/cxx/src/gemm.cpp b/backend/cxx/src/gemm.cpp index b3c28509..96dea9d1 100644 --- a/backend/cxx/src/gemm.cpp +++ b/backend/cxx/src/gemm.cpp @@ -1,111 +1,254 @@ #include "sparseir/gemm.hpp" -#ifdef SPARSEIR_USE_BLAS -namespace sparseir { - - #include #include +#include -// Fortran dgemm function pointer type -#ifdef SPARSEIR_USE_ILP64 -using dgemm_fptr = void(*)(const char*, const char*, const long long*, - const long long*, const long long*, const double*, - const double*, const long long*, const double*, - const long long*, const double*, double*, const long long*); - -using zgemm_fptr = void(*)(const char*, const char*, const long long*, - const long long*, const long long*, const void*, - const void*, const long long*, const void*, - const long long*, const void*, void*, const long long*); -#else -using dgemm_fptr = void(*)(const char*, const char*, const int*, - const int*, const int*, const double*, - const double*, const int*, const double*, - const int*, const double*, double*, const int*); - -using zgemm_fptr = void(*)(const char*, const char*, const int*, - const int*, const int*, const void*, - const void*, const int*, const void*, - const int*, const void*, void*, const int*); -#endif +namespace sparseir { +// Fortran dgemm function pointer types (both ILP64 and LP64) +using dgemm_fptr_lp64 = void(*)(const char*, const char*, const int*, + const int*, const int*, const double*, + const double*, const int*, const double*, + const int*, const double*, double*, const int*); + +using zgemm_fptr_lp64 = void(*)(const char*, const char*, const int*, + const int*, const int*, const void*, + const void*, const int*, const void*, + const int*, const void*, void*, const int*); + +using dgemm_fptr_ilp64 = void(*)(const char*, const char*, const long long*, + const long long*, const long long*, const double*, + const double*, const long long*, const double*, + const long long*, const double*, double*, const long long*); + +using zgemm_fptr_ilp64 = void(*)(const char*, const char*, const long long*, + const long long*, const long long*, const void*, + const void*, const long long*, const void*, + const long long*, const void*, void*, const long long*); + +// Storage for registered function pointers +static dgemm_fptr_lp64 registered_dgemm_lp64 = nullptr; +static zgemm_fptr_lp64 registered_zgemm_lp64 = nullptr; +static dgemm_fptr_ilp64 registered_dgemm_ilp64 = nullptr; +static zgemm_fptr_ilp64 registered_zgemm_ilp64 = nullptr; +static bool symbols_resolved = false; +static bool use_ilp64 = false; + +// Function to resolve BLAS symbols at runtime +template +FuncPtr resolve_blas_symbol(const char* patterns[], size_t count) { + // Try RTLD_DEFAULT first (searches already-loaded libraries) + void* handle = RTLD_DEFAULT; + + for (size_t i = 0; i < count; ++i) { + void* sym = dlsym(handle, patterns[i]); + if (sym) { + return reinterpret_cast(sym); + } + } + + // If not found, try dlopen(NULL) to search main program and all loaded libraries + handle = dlopen(NULL, RTLD_LAZY); + if (handle) { + for (size_t i = 0; i < count; ++i) { + void* sym = dlsym(handle, patterns[i]); + if (sym) { + dlclose(handle); + return reinterpret_cast(sym); + } + } + dlclose(handle); + } + + return nullptr; +} +// Initialize BLAS symbols once - try ILP64 first, then fall back to LP64 +static void init_blas_symbols() { + if (symbols_resolved) { + return; + } + #ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR -// Storage for registered function pointers (initially not registered) -static dgemm_fptr registered_dgemm = nullptr; -static zgemm_fptr registered_zgemm = nullptr; + // When using external function pointers, check if they have been registered + if ((!registered_dgemm_lp64 && !registered_dgemm_ilp64) || + (!registered_zgemm_lp64 && !registered_zgemm_ilp64)) { + throw std::runtime_error( + "BLAS function pointers not registered - call spir_register_dgemm_zgemm_lp64() " + "or spir_register_dgemm_zgemm_ilp64() before using BLAS functions"); + } + // Determine which interface is being used + use_ilp64 = (registered_dgemm_ilp64 != nullptr && registered_zgemm_ilp64 != nullptr); + symbols_resolved = true; + return; +#else + // First, try ILP64 symbol patterns + const char* dgemm_patterns_ilp64[] = { + "dgemm64_", "dgemm64", "DGEMM64_", "DGEMM64" + }; + const char* zgemm_patterns_ilp64[] = { + "zgemm64_", "zgemm64", "ZGEMM64_", "ZGEMM64" + }; + + registered_dgemm_ilp64 = resolve_blas_symbol( + dgemm_patterns_ilp64, sizeof(dgemm_patterns_ilp64) / sizeof(dgemm_patterns_ilp64[0])); + registered_zgemm_ilp64 = resolve_blas_symbol( + zgemm_patterns_ilp64, sizeof(zgemm_patterns_ilp64) / sizeof(zgemm_patterns_ilp64[0])); + + // If ILP64 found, use ILP64 + if (registered_dgemm_ilp64 && registered_zgemm_ilp64) { + use_ilp64 = true; + symbols_resolved = true; + return; + } + + // Fall back to LP64 symbol patterns + const char* dgemm_patterns_lp64[] = { + "dgemm_", "dgemm", "DGEMM_", "DGEMM" + }; + const char* zgemm_patterns_lp64[] = { + "zgemm_", "zgemm", "ZGEMM_", "ZGEMM" + }; + + registered_dgemm_lp64 = resolve_blas_symbol( + dgemm_patterns_lp64, sizeof(dgemm_patterns_lp64) / sizeof(dgemm_patterns_lp64[0])); + registered_zgemm_lp64 = resolve_blas_symbol( + zgemm_patterns_lp64, sizeof(zgemm_patterns_lp64) / sizeof(zgemm_patterns_lp64[0])); + + use_ilp64 = false; + symbols_resolved = true; +#endif +} extern "C" { -// Register function pointers from outside -void spir_register_dgemm(void* fn) { - registered_dgemm = reinterpret_cast(fn); +// Register function pointers from outside (for SPARSEIR_USE_EXTERN_FBLAS_PTR) +// Register both dgemm and zgemm at the same time (LP64) +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn) { + registered_dgemm_lp64 = reinterpret_cast(dgemm_fn); + registered_zgemm_lp64 = reinterpret_cast(zgemm_fn); + use_ilp64 = false; + symbols_resolved = true; } -void spir_register_zgemm(void* fn) { - registered_zgemm = reinterpret_cast(fn); +// Register function pointers from outside (for SPARSEIR_USE_EXTERN_FBLAS_PTR) +// Register both dgemm and zgemm at the same time (ILP64) +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn) { + registered_dgemm_ilp64 = reinterpret_cast(dgemm_fn); + registered_zgemm_ilp64 = reinterpret_cast(zgemm_fn); + use_ilp64 = true; + symbols_resolved = true; } } // extern "C" -#else - -static dgemm_fptr registered_dgemm = dgemm_; -static zgemm_fptr registered_zgemm = zgemm_; - -#endif // SPARSEIR_USE_EXTERN_FBLAS_PTR +// Implementation of my_dgemm - Fortran BLAS interface (LP64) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc) +{ + // Initialize symbols if not already done + init_blas_symbols(); + + if (use_ilp64) { + // Convert LP64 arguments to ILP64 + if (!registered_dgemm_ilp64) { + throw std::runtime_error("dgemm (ILP64) symbol not found - BLAS library may not be linked correctly"); + } + const long long m_ll = static_cast(*m), n_ll = static_cast(*n), k_ll = static_cast(*k); + const long long lda_ll = static_cast(*lda), ldb_ll = static_cast(*ldb), ldc_ll = static_cast(*ldc); + registered_dgemm_ilp64(transa, transb, &m_ll, &n_ll, &k_ll, alpha, a, &lda_ll, b, &ldb_ll, beta, c, &ldc_ll); + } else { + // Use LP64 directly + if (!registered_dgemm_lp64) { + throw std::runtime_error("dgemm (LP64) symbol not found - BLAS library may not be linked correctly"); + } + registered_dgemm_lp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + } +} -// Implementation of my_dgemm - converts CBLAS interface to Fortran BLAS -void my_dgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const double alpha, - const double *A, const int64_t lda, const double *B, const int64_t ldb, - const double beta, double *C, const int64_t ldc) +// Implementation of my_zgemm - Fortran BLAS interface (LP64) +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc) { - // Convert CBLAS transpose flags to Fortran character codes - char transa = (TransA == CblasNoTrans) ? 'N' : (TransA == CblasTrans) ? 'T' : 'C'; - char transb = (TransB == CblasNoTrans) ? 'N' : (TransB == CblasTrans) ? 'T' : 'C'; - -#ifdef SPARSEIR_USE_ILP64 - // ILP64: Convert to long long for Fortran interface - const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); - const long long lda_ll = static_cast(lda), ldb_ll = static_cast(ldb), ldc_ll = static_cast(ldc); - registered_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda_ll, B, &ldb_ll, &beta, C, &ldc_ll); -#else - // LP64: Convert to int for Fortran interface - const int m = static_cast(M), n = static_cast(N), k = static_cast(K); - const int lda_int = static_cast(lda), ldb_int = static_cast(ldb), ldc_int = static_cast(ldc); - registered_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda_int, B, &ldb_int, &beta, C, &ldc_int); -#endif + // Initialize symbols if not already done + init_blas_symbols(); + + if (use_ilp64) { + // Convert LP64 arguments to ILP64 + if (!registered_zgemm_ilp64) { + throw std::runtime_error("zgemm (ILP64) symbol not found - BLAS library may not be linked correctly"); + } + const long long m_ll = static_cast(*m), n_ll = static_cast(*n), k_ll = static_cast(*k); + const long long lda_ll = static_cast(*lda), ldb_ll = static_cast(*ldb), ldc_ll = static_cast(*ldc); + registered_zgemm_ilp64(transa, transb, &m_ll, &n_ll, &k_ll, alpha, a, &lda_ll, b, &ldb_ll, beta, c, &ldc_ll); + } else { + // Use LP64 directly + if (!registered_zgemm_lp64) { + throw std::runtime_error("zgemm (LP64) symbol not found - BLAS library may not be linked correctly"); + } + registered_zgemm_lp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + } } -// Implementation of my_zgemm - converts CBLAS interface to Fortran BLAS -void my_zgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const void *alpha, - const void *A, const int64_t lda, const void *B, const int64_t ldb, - const void *beta, void *C, const int64_t ldc) +// Implementation of my_dgemm64 - Fortran BLAS interface (ILP64) +void my_dgemm64(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc) { - // Convert CBLAS transpose flags to Fortran character codes - char transa = (TransA == CblasNoTrans) ? 'N' : (TransA == CblasTrans) ? 'T' : 'C'; - char transb = (TransB == CblasNoTrans) ? 'N' : (TransB == CblasTrans) ? 'T' : 'C'; + // Initialize symbols if not already done + init_blas_symbols(); + + if (use_ilp64) { + // Use ILP64 directly + if (!registered_dgemm_ilp64) { + throw std::runtime_error("dgemm64 (ILP64) symbol not found - BLAS library may not be linked correctly"); + } + registered_dgemm_ilp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + } else { + // Convert ILP64 arguments to LP64 + if (!registered_dgemm_lp64) { + throw std::runtime_error("dgemm (LP64) symbol not found - BLAS library may not be linked correctly"); + } + const int m_int = static_cast(*m), n_int = static_cast(*n), k_int = static_cast(*k); + const int lda_int = static_cast(*lda), ldb_int = static_cast(*ldb), ldc_int = static_cast(*ldc); + registered_dgemm_lp64(transa, transb, &m_int, &n_int, &k_int, alpha, a, &lda_int, b, &ldb_int, beta, c, &ldc_int); + } +} - if (!registered_zgemm) { - throw std::runtime_error("zgemm not registered"); +// Implementation of my_zgemm64 - Fortran BLAS interface (ILP64) +void my_zgemm64(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc) +{ + // Initialize symbols if not already done + init_blas_symbols(); + + if (use_ilp64) { + // Use ILP64 directly + if (!registered_zgemm_ilp64) { + throw std::runtime_error("zgemm64 (ILP64) symbol not found - BLAS library may not be linked correctly"); + } + registered_zgemm_ilp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); + } else { + // Convert ILP64 arguments to LP64 + if (!registered_zgemm_lp64) { + throw std::runtime_error("zgemm (LP64) symbol not found - BLAS library may not be linked correctly"); + } + const int m_int = static_cast(*m), n_int = static_cast(*n), k_int = static_cast(*k); + const int lda_int = static_cast(*lda), ldb_int = static_cast(*ldb), ldc_int = static_cast(*ldc); + registered_zgemm_lp64(transa, transb, &m_int, &n_int, &k_int, alpha, a, &lda_int, b, &ldb_int, beta, c, &ldc_int); } -#ifdef SPARSEIR_USE_ILP64 - // ILP64: Convert to long long for Fortran interface - const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); - const long long lda_ll = static_cast(lda), ldb_ll = static_cast(ldb), ldc_ll = static_cast(ldc); - registered_zgemm(&transa, &transb, &m, &n, &k, alpha, A, &lda_ll, B, &ldb_ll, beta, C, &ldc_ll); -#else - // LP64: Convert to int for Fortran interface - const int m = static_cast(M), n = static_cast(N), k = static_cast(K); - const int lda_int = static_cast(lda), ldb_int = static_cast(ldb), ldc_int = static_cast(ldc); - registered_zgemm(&transa, &transb, &m, &n, &k, alpha, A, &lda_int, B, &ldb_int, beta, C, &ldc_int); -#endif } } // namespace sparseir - - -#endif // SPARSEIR_USE_BLAS diff --git a/backend/cxx/test/CMakeLists.txt b/backend/cxx/test/CMakeLists.txt index 35263eb1..37a56dbb 100644 --- a/backend/cxx/test/CMakeLists.txt +++ b/backend/cxx/test/CMakeLists.txt @@ -23,15 +23,11 @@ foreach(TEST_FILE ${TEST_FILES}) target_link_libraries(${TEST_NAME} PRIVATE SparseIR::sparseir) target_link_libraries(${TEST_NAME} PRIVATE Eigen3::Eigen Threads::Threads) - # Add BLAS/LAPACK linking for tests if enabled - if(SPARSEIR_USE_BLAS AND BLAS_FOUND) + # Add BLAS linking for tests (always enabled) + if(BLAS_FOUND) target_link_libraries(${TEST_NAME} PRIVATE ${BLAS_LIBRARIES}) endif() - if(SPARSEIR_USE_LAPACKE AND LAPACK_FOUND) - target_link_libraries(${TEST_NAME} PRIVATE ${LAPACK_LIBRARIES}) - endif() - # macOS-specific configuration for tests if(APPLE AND ACCELERATE_FRAMEWORK) target_link_libraries(${TEST_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) diff --git a/cmake/modules/FindLAPACKE.cmake b/cmake/modules/FindLAPACKE.cmake deleted file mode 100644 index 55fc87e6..00000000 --- a/cmake/modules/FindLAPACKE.cmake +++ /dev/null @@ -1,380 +0,0 @@ -### -# -# @copyright (c) 2009-2014 The University of Tennessee and The University -# of Tennessee Research Foundation. -# All rights reserved. -# @copyright (c) 2012-2016 Inria. All rights reserved. -# @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. -# -### -# -# - Find LAPACKE include dirs and libraries -# Use this module by invoking find_package with the form: -# find_package(LAPACKE -# [REQUIRED] # Fail with error if lapacke is not found -# [COMPONENTS ...] # dependencies -# ) -# -# LAPACKE depends on the following libraries: -# - LAPACK -# -# This module finds headers and lapacke library. -# Results are reported in variables: -# LAPACKE_FOUND - True if headers and requested libraries were found -# LAPACKE_LINKER_FLAGS - list of required linker flags (excluding -l and -L) -# LAPACKE_INCLUDE_DIRS - lapacke include directories -# LAPACKE_LIBRARY_DIRS - Link directories for lapacke libraries -# LAPACKE_LIBRARIES - lapacke component libraries to be linked -# LAPACKE_INCLUDE_DIRS_DEP - lapacke + dependencies include directories -# LAPACKE_LIBRARY_DIRS_DEP - lapacke + dependencies link directories -# LAPACKE_LIBRARIES_DEP - lapacke libraries + dependencies -# -# The user can give specific paths where to find the libraries adding cmake -# options at configure (ex: cmake path/to/project -DLAPACKE_DIR=path/to/lapacke): -# LAPACKE_DIR - Where to find the base directory of lapacke -# LAPACKE_INCDIR - Where to find the header files -# LAPACKE_LIBDIR - Where to find the library files -# The module can also look for the following environment variables if paths -# are not given as cmake variable: LAPACKE_DIR, LAPACKE_INCDIR, LAPACKE_LIBDIR -# -# LAPACKE could be directly embedded in LAPACK library (ex: Intel MKL) so that -# we test a lapacke function with the lapack libraries found and set LAPACKE -# variables to LAPACK ones if test is successful. To skip this feature and -# look for a stand alone lapacke, please add the following in your -# CMakeLists.txt before to call find_package(LAPACKE): -# set(LAPACKE_STANDALONE TRUE) - -#============================================================================= -# Copyright 2012-2013 Inria -# Copyright 2012-2013 Emmanuel Agullo -# Copyright 2012-2013 Mathieu Faverge -# Copyright 2012 Cedric Castagnede -# Copyright 2013-2016 Florent Pruvost -# -# Distributed under the OSI-approved BSD License (the "License"); -# see accompanying file MORSE-Copyright.txt for details. -# -# This software is distributed WITHOUT ANY WARRANTY; without even the -# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the License for more information. -#============================================================================= -# (To distribute this file outside of Morse, substitute the full -# License text for the above reference.) - -if (NOT LAPACKE_FOUND) - set(LAPACKE_DIR "" CACHE PATH "Installation directory of LAPACKE library") - if (NOT LAPACKE_FIND_QUIETLY) - message(STATUS "A cache variable, namely LAPACKE_DIR, has been set to specify the install directory of LAPACKE") - endif() -endif() - -# LAPACKE depends on LAPACK anyway, try to find it -if (NOT LAPACK_FOUND) - if(LAPACKE_FIND_REQUIRED) - find_package(LAPACKEXT REQUIRED) - else() - find_package(LAPACKEXT) - endif() -endif() - -# LAPACKE depends on LAPACK -if (LAPACK_FOUND) - - if (NOT LAPACKE_STANDALONE) - # check if a lapacke function exists in the LAPACK lib - include(CheckFunctionExists) - set(CMAKE_REQUIRED_LIBRARIES "${LAPACK_LINKER_FLAGS};${LAPACK_LIBRARIES}") - unset(LAPACKE_WORKS CACHE) - check_function_exists(LAPACKE_dgeqrf LAPACKE_WORKS) - mark_as_advanced(LAPACKE_WORKS) - set(CMAKE_REQUIRED_LIBRARIES) - - if(LAPACKE_WORKS) - if(NOT LAPACKE_FIND_QUIETLY) - message(STATUS "Looking for lapacke: test with lapack succeeds") - endif() - # test succeeds: LAPACKE is in LAPACK - set(LAPACKE_LIBRARIES "${LAPACK_LIBRARIES}") - set(LAPACKE_LIBRARIES_DEP "${LAPACK_LIBRARIES}") - if (LAPACK_LIBRARY_DIRS) - set(LAPACKE_LIBRARY_DIRS "${LAPACK_LIBRARY_DIRS}") - endif() - if(LAPACK_INCLUDE_DIRS) - set(LAPACKE_INCLUDE_DIRS "${LAPACK_INCLUDE_DIRS}") - set(LAPACKE_INCLUDE_DIRS_DEP "${LAPACK_INCLUDE_DIRS}") - endif() - if (LAPACK_LINKER_FLAGS) - set(LAPACKE_LINKER_FLAGS "${LAPACK_LINKER_FLAGS}") - endif() - endif() - endif (NOT LAPACKE_STANDALONE) - - if (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) - - if(NOT LAPACKE_WORKS AND NOT LAPACKE_FIND_QUIETLY) - message(STATUS "Looking for lapacke : test with lapack fails") - endif() - # test fails: try to find LAPACKE lib exterior to LAPACK - - # Try to find LAPACKE lib - ####################### - - # Looking for include - # ------------------- - - # Add system include paths to search include - # ------------------------------------------ - unset(_inc_env) - set(ENV_LAPACKE_DIR "$ENV{LAPACKE_DIR}") - set(ENV_LAPACKE_INCDIR "$ENV{LAPACKE_INCDIR}") - if(ENV_LAPACKE_INCDIR) - list(APPEND _inc_env "${ENV_LAPACKE_INCDIR}") - elseif(ENV_LAPACKE_DIR) - list(APPEND _inc_env "${ENV_LAPACKE_DIR}") - list(APPEND _inc_env "${ENV_LAPACKE_DIR}/include") - list(APPEND _inc_env "${ENV_LAPACKE_DIR}/include/lapacke") - else() - if(WIN32) - string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") - else() - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{CPATH}") - list(APPEND _inc_env "${_path_env}") - string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") - list(APPEND _inc_env "${_path_env}") - endif() - endif() - list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") - list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") - list(REMOVE_DUPLICATES _inc_env) - - - # Try to find the lapacke header in the given paths - # ------------------------------------------------- - # call cmake macro to find the header path - if(LAPACKE_INCDIR) - set(LAPACKE_lapacke.h_DIRS "LAPACKE_lapacke.h_DIRS-NOTFOUND") - find_path(LAPACKE_lapacke.h_DIRS - NAMES lapacke.h - HINTS ${LAPACKE_INCDIR}) - else() - if(LAPACKE_DIR) - set(LAPACKE_lapacke.h_DIRS "LAPACKE_lapacke.h_DIRS-NOTFOUND") - find_path(LAPACKE_lapacke.h_DIRS - NAMES lapacke.h - HINTS ${LAPACKE_DIR} - PATH_SUFFIXES "include" "include/lapacke") - else() - set(LAPACKE_lapacke.h_DIRS "LAPACKE_lapacke.h_DIRS-NOTFOUND") - find_path(LAPACKE_lapacke.h_DIRS - NAMES lapacke.h - HINTS ${_inc_env}) - endif() - endif() - mark_as_advanced(LAPACKE_lapacke.h_DIRS) - - # If found, add path to cmake variable - # ------------------------------------ - if (LAPACKE_lapacke.h_DIRS) - set(LAPACKE_INCLUDE_DIRS "${LAPACKE_lapacke.h_DIRS}") - else () - set(LAPACKE_INCLUDE_DIRS "LAPACKE_INCLUDE_DIRS-NOTFOUND") - if(NOT LAPACKE_FIND_QUIETLY) - message(STATUS "Looking for lapacke -- lapacke.h not found") - endif() - endif() - - - # Looking for lib - # --------------- - - # Add system library paths to search lib - # -------------------------------------- - unset(_lib_env) - set(ENV_LAPACKE_LIBDIR "$ENV{LAPACKE_LIBDIR}") - if(ENV_LAPACKE_LIBDIR) - list(APPEND _lib_env "${ENV_LAPACKE_LIBDIR}") - elseif(ENV_LAPACKE_DIR) - list(APPEND _lib_env "${ENV_LAPACKE_DIR}") - list(APPEND _lib_env "${ENV_LAPACKE_DIR}/lib") - else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() - endif() - list(REMOVE_DUPLICATES _lib_env) - - # Try to find the lapacke lib in the given paths - # ---------------------------------------------- - - # call cmake macro to find the lib path - if(LAPACKE_LIBDIR) - set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") - find_library(LAPACKE_lapacke_LIBRARY - NAMES lapacke - HINTS ${LAPACKE_LIBDIR}) - else() - if(LAPACKE_DIR) - set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") - find_library(LAPACKE_lapacke_LIBRARY - NAMES lapacke - HINTS ${LAPACKE_DIR} - PATH_SUFFIXES lib lib32 lib64) - else() - set(LAPACKE_lapacke_LIBRARY "LAPACKE_lapacke_LIBRARY-NOTFOUND") - find_library(LAPACKE_lapacke_LIBRARY - NAMES lapacke - HINTS ${_lib_env}) - endif() - endif() - mark_as_advanced(LAPACKE_lapacke_LIBRARY) - - # If found, add path to cmake variable - # ------------------------------------ - if (LAPACKE_lapacke_LIBRARY) - get_filename_component(lapacke_lib_path "${LAPACKE_lapacke_LIBRARY}" PATH) - # set cmake variables - set(LAPACKE_LIBRARIES "${LAPACKE_lapacke_LIBRARY}") - set(LAPACKE_LIBRARY_DIRS "${lapacke_lib_path}") - else () - set(LAPACKE_LIBRARIES "LAPACKE_LIBRARIES-NOTFOUND") - set(LAPACKE_LIBRARY_DIRS "LAPACKE_LIBRARY_DIRS-NOTFOUND") - if (NOT LAPACKE_FIND_QUIETLY) - message(STATUS "Looking for lapacke -- lib lapacke not found") - endif() - endif () - - # check a function to validate the find - if(LAPACKE_LIBRARIES) - - set(REQUIRED_LDFLAGS) - set(REQUIRED_INCDIRS) - set(REQUIRED_LIBDIRS) - set(REQUIRED_LIBS) - - # LAPACKE - if (LAPACKE_INCLUDE_DIRS) - set(REQUIRED_INCDIRS "${LAPACKE_INCLUDE_DIRS}") - endif() - if (LAPACKE_LIBRARY_DIRS) - set(REQUIRED_LIBDIRS "${LAPACKE_LIBRARY_DIRS}") - endif() - set(REQUIRED_LIBS "${LAPACKE_LIBRARIES}") - # LAPACK - if (LAPACK_INCLUDE_DIRS) - list(APPEND REQUIRED_INCDIRS "${LAPACK_INCLUDE_DIRS}") - endif() - if (LAPACK_LIBRARY_DIRS) - list(APPEND REQUIRED_LIBDIRS "${LAPACK_LIBRARY_DIRS}") - endif() - list(APPEND REQUIRED_LIBS "${LAPACK_LIBRARIES}") - if (LAPACK_LINKER_FLAGS) - list(APPEND REQUIRED_LDFLAGS "${LAPACK_LINKER_FLAGS}") - endif() - # Fortran - if (CMAKE_C_COMPILER_ID MATCHES "GNU") - find_library( - FORTRAN_gfortran_LIBRARY - NAMES gfortran - HINTS ${_lib_env} - ) - mark_as_advanced(FORTRAN_gfortran_LIBRARY) - if (FORTRAN_gfortran_LIBRARY) - list(APPEND REQUIRED_LIBS "${FORTRAN_gfortran_LIBRARY}") - endif() - elseif (CMAKE_C_COMPILER_ID MATCHES "Intel") - find_library( - FORTRAN_ifcore_LIBRARY - NAMES ifcore - HINTS ${_lib_env} - ) - mark_as_advanced(FORTRAN_ifcore_LIBRARY) - if (FORTRAN_ifcore_LIBRARY) - list(APPEND REQUIRED_LIBS "${FORTRAN_ifcore_LIBRARY}") - endif() - endif() - # m - find_library(M_LIBRARY NAMES m HINTS ${_lib_env}) - mark_as_advanced(M_LIBRARY) - if(M_LIBRARY) - list(APPEND REQUIRED_LIBS "-lm") - endif() - # set required libraries for link - set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") - set(CMAKE_REQUIRED_LIBRARIES) - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LDFLAGS}") - foreach(lib_dir ${REQUIRED_LIBDIRS}) - list(APPEND CMAKE_REQUIRED_LIBRARIES "-L${lib_dir}") - endforeach() - list(APPEND CMAKE_REQUIRED_LIBRARIES "${REQUIRED_LIBS}") - string(REGEX REPLACE "^ -" "-" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") - - # test link - unset(LAPACKE_WORKS CACHE) - include(CheckFunctionExists) - check_function_exists(LAPACKE_dgeqrf LAPACKE_WORKS) - mark_as_advanced(LAPACKE_WORKS) - - if(LAPACKE_WORKS) - # save link with dependencies - set(LAPACKE_LIBRARIES_DEP "${REQUIRED_LIBS}") - set(LAPACKE_LIBRARY_DIRS_DEP "${REQUIRED_LIBDIRS}") - set(LAPACKE_INCLUDE_DIRS_DEP "${REQUIRED_INCDIRS}") - set(LAPACKE_LINKER_FLAGS "${REQUIRED_LDFLAGS}") - list(REMOVE_DUPLICATES LAPACKE_LIBRARY_DIRS_DEP) - list(REMOVE_DUPLICATES LAPACKE_INCLUDE_DIRS_DEP) - list(REMOVE_DUPLICATES LAPACKE_LINKER_FLAGS) - else() - if(NOT LAPACKE_FIND_QUIETLY) - message(STATUS "Looking for lapacke: test of LAPACKE_dgeqrf with lapacke and lapack libraries fails") - message(STATUS "CMAKE_REQUIRED_LIBRARIES: ${CMAKE_REQUIRED_LIBRARIES}") - message(STATUS "CMAKE_REQUIRED_INCLUDES: ${CMAKE_REQUIRED_INCLUDES}") - message(STATUS "Check in CMakeFiles/CMakeError.log to figure out why it fails") - endif() - endif() - set(CMAKE_REQUIRED_INCLUDES) - set(CMAKE_REQUIRED_FLAGS) - set(CMAKE_REQUIRED_LIBRARIES) - endif(LAPACKE_LIBRARIES) - - endif (LAPACKE_STANDALONE OR NOT LAPACKE_WORKS) - -else(LAPACK_FOUND) - - if (NOT LAPACKE_FIND_QUIETLY) - message(STATUS "LAPACKE requires LAPACK but LAPACK has not been found." - "Please look for LAPACK first.") - endif() - -endif(LAPACK_FOUND) - -if (LAPACKE_LIBRARIES) - list(GET LAPACKE_LIBRARIES 0 first_lib) - get_filename_component(first_lib_path "${first_lib}" PATH) - if (${first_lib_path} MATCHES "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)") - string(REGEX REPLACE "(/lib(32|64)?$)|(/lib/intel64$|/lib/ia32$)" "" not_cached_dir "${first_lib_path}") - set(LAPACKE_DIR_FOUND "${not_cached_dir}" CACHE PATH "Installation directory of LAPACKE library" FORCE) - else() - set(LAPACKE_DIR_FOUND "${first_lib_path}" CACHE PATH "Installation directory of LAPACKE library" FORCE) - endif() -endif() -mark_as_advanced(LAPACKE_DIR) -mark_as_advanced(LAPACKE_DIR_FOUND) - -# check that LAPACKE has been found -# --------------------------------- -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(LAPACKE DEFAULT_MSG - LAPACKE_LIBRARIES - LAPACKE_WORKS) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1f74db2a..bb294e3c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -39,15 +39,8 @@ if(SPARSEIR_BUILD_FORTRAN) message(STATUS "Fortran support enabled for building Fortran bindings") endif() -# Eigen3 -# BLAS/LAPACK options -if(SPARSEIR_USE_BLAS) - add_compile_definitions(SPARSEIR_USE_BLAS) - message(STATUS "BLAS support enabled") - - # Do not resolve BLAS library symbols (assume sharing with NumPy) - # Do not call find_package(BLAS) or, if called, do not pass to target_link_libraries -endif() +# Do not resolve BLAS library symbols (assume sharing with NumPy) +# Do not call find_package(BLAS) or, if called, do not pass to target_link_libraries # External FBLAS pointer option if(SPARSEIR_USE_EXTERN_FBLAS_PTR) diff --git a/python/pylibsparseir/core.py b/python/pylibsparseir/core.py index e8e9df62..7b70457e 100644 --- a/python/pylibsparseir/core.py +++ b/python/pylibsparseir/core.py @@ -71,10 +71,11 @@ def _find_library(): ptr_z = ctypes.pythonapi.PyCapsule_GetPointer(capsule_z, name_z) _lib = ctypes.CDLL(_find_library()) - _lib.spir_register_dgemm.argtypes = [ctypes.c_void_p] - _lib.spir_register_dgemm(ptr) - _lib.spir_register_zgemm.argtypes = [ctypes.c_void_p] - _lib.spir_register_zgemm(ptr_z) + # Register both dgemm and zgemm at once using LP64 interface + # Note: SciPy BLAS typically uses LP64 interface (32-bit integers) + _lib.spir_register_dgemm_zgemm_lp64.argtypes = [ctypes.c_void_p, ctypes.c_void_p] + _lib.spir_register_dgemm_zgemm_lp64.restype = None + _lib.spir_register_dgemm_zgemm_lp64(ptr, ptr_z) if os.environ.get("SPARSEIR_DEBUG", "").lower() in ("1", "true", "yes", "on"): print(f"[core.py] Registered SciPy BLAS dgemm @ {hex(ptr)}") print(f"[core.py] Registered SciPy BLAS zgemm @ {hex(ptr_z)}") diff --git a/python/run_tests.sh b/python/run_tests.sh new file mode 100755 index 00000000..e3e6e1f4 --- /dev/null +++ b/python/run_tests.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Script to setup build environment and run tests with uv + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "======================================" +echo "Cleaning up previous build artifacts..." +echo "======================================" +# Remove copied source files and directories +[ -d "include" ] && rm -rf "include" +[ -d "src" ] && rm -rf "src" +[ -d "cmake" ] && rm -rf "cmake" +[ -f "LICENSE" ] && rm -f "LICENSE" + +# Remove .venv directory if it exists +[ -d ".venv" ] && rm -rf ".venv" + +# Remove build cache directories +[ -d "_skbuild" ] && rm -rf "_skbuild" +[ -d "dist" ] && rm -rf "dist" +[ -d "*.egg-info" ] && rm -rf *.egg-info 2>/dev/null || true + +echo "Cleanup completed." + +echo "" +echo "======================================" +echo "Setting up build environment..." +echo "======================================" +python3 setup_build.py + +echo "" +echo "======================================" +echo "Running uv sync (with rebuild)..." +echo "======================================" +# Remove any cached build artifacts and force rebuild +uv sync --refresh + +echo "" +echo "======================================" +echo "Running tests..." +echo "======================================" +uv run pytest tests/ -v + +echo "" +echo "======================================" +echo "All tests completed successfully!" +echo "======================================" + From ecb9b76fcf41ef03f8ef5569c8627de78bb96fd5 Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 19:54:06 +0900 Subject: [PATCH 03/11] ci: add ILP64 BLAS test job for C++ backend - Add test-cxx-backend-ilp64 job to test ILP64 BLAS interface - Install libopenblas64-0 and libopenblas64-dev on Ubuntu - Build with -DSPARSEIR_USE_BLAS_ILP64=ON flag - Update rollup job to include ILP64 test results --- .github/workflows/test_cxx_backend.yml | 32 +++++- CMakeLists.txt | 64 ++++------- README.md | 144 ++++++++++++++++++------- backend/cxx/CMakeLists.txt | 27 +++-- 4 files changed, 177 insertions(+), 90 deletions(-) diff --git a/.github/workflows/test_cxx_backend.yml b/.github/workflows/test_cxx_backend.yml index 963948b5..8f4cf816 100644 --- a/.github/workflows/test_cxx_backend.yml +++ b/.github/workflows/test_cxx_backend.yml @@ -40,9 +40,39 @@ jobs: working-directory: capi_sample run: ./run_sample.sh + test-cxx-backend-ilp64: + runs-on: ubuntu-latest + name: C++ backend with ILP64 BLAS + + steps: + - uses: actions/checkout@v5 + + - name: Install dependencies with ILP64 BLAS + run: | + sudo apt update && sudo apt install -y libeigen3-dev libopenblas64-0 libopenblas64-dev + + - name: Build and test C++ backend with ILP64 + working-directory: backend/cxx + run: | + rm -rf build + mkdir -p build && cd build + cmake .. -DSPARSEIR_BUILD_TESTING=ON -DSPARSEIR_USE_BLAS_ILP64=ON + cmake --build . + ctest --output-on-failure + + - name: Build and test capi_test against ILP64 backend + working-directory: capi_test + run: ./test_with_cxx_backend.sh + + - name: Build and run capi_sample with ILP64 + working-directory: capi_sample + run: ./run_sample.sh + test-cxx-backend-rollup: runs-on: ubuntu-latest - needs: test-cxx-backend + needs: + - test-cxx-backend + - test-cxx-backend-ilp64 if: always() steps: - name: All tests passed diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b47550b..445abf9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,60 +49,32 @@ endif() # Eigen3 # BLAS/LAPACK options -option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface" OFF) +option(SPARSEIR_USE_BLAS_ILP64 "Enable ILP64 BLAS interface (64-bit integers)" OFF) # BLAS search and configuration (always enabled) message(STATUS "BLAS support enabled") -# ILP64 support -if(SPARSEIR_USE_ILP64) - add_compile_definitions(SPARSEIR_USE_ILP64) - message(STATUS "ILP64 BLAS interface enabled") +# Set BLAS integer size based on SPARSEIR_USE_BLAS_ILP64 +if(SPARSEIR_USE_BLAS_ILP64) + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "Searching for ILP64 BLAS interface (64-bit integers)") else() - message(STATUS "LP64 BLAS interface enabled (default)") + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "Searching for LP64 BLAS interface (32-bit integers, default)") endif() -# Find BLAS package -if(SPARSEIR_USE_ILP64) - # For ILP64, check if BLAS_LIBRARIES is already set by user - if(BLAS_LIBRARIES) - message(STATUS "ILP64 BLAS library specified: ${BLAS_LIBRARIES}") - # Verify the library exists - if(NOT EXISTS "${BLAS_LIBRARIES}") - message(FATAL_ERROR "Specified ILP64 BLAS library not found: ${BLAS_LIBRARIES}") - endif() - set(BLAS_FOUND TRUE) - else() - # Try to find ILP64 BLAS automatically - find_library(BLAS_LIBRARIES - NAMES openblas64 - PATHS /usr/lib/x86_64-linux-gnu /usr/lib /usr/local/lib - NO_DEFAULT_PATH - ) - if(BLAS_LIBRARIES) - message(STATUS "ILP64 BLAS found automatically: ${BLAS_LIBRARIES}") - set(BLAS_FOUND TRUE) - else() - message(FATAL_ERROR "ILP64 BLAS (libopenblas64) not found. You can specify it with -DBLAS_LIBRARIES=/path/to/libopenblas64.so") - endif() - endif() +# Find BLAS package using standard CMake FindBLAS +find_package(BLAS REQUIRED) + +if(BLAS_FOUND) + message(STATUS "BLAS found: ${BLAS_LIBRARIES}") + if(SPARSEIR_USE_BLAS_ILP64) + message(STATUS "BLAS with ILP64 interface (BLA_SIZEOF_INTEGER=8)") else() - # For LP64, use standard CMake BLAS finder - find_package(BLAS REQUIRED) - if(BLAS_FOUND) - message(STATUS "BLAS found: ${BLAS_LIBRARIES}") - else() - message(FATAL_ERROR "BLAS not found") - endif() + message(STATUS "BLAS with LP64 interface (BLA_SIZEOF_INTEGER=4)") endif() - - # CMake's find_package(BLAS) should provide include directories - #if(BLAS_INCLUDE_DIRS) - #include_directories(${BLAS_INCLUDE_DIRS}) - #message(STATUS "BLAS include directories: ${BLAS_INCLUDE_DIRS}") - #else() - #message(FATAL_ERROR "BLAS include directories not found") - #endif() +else() + message(FATAL_ERROR "BLAS not found") endif() @@ -183,7 +155,7 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) # Add BLAS linking (always enabled) -if(SPARSEIR_USE_ILP64 OR BLAS_FOUND) +if(SPARSEIR_USE_BLAS_ILP64 OR BLAS_FOUND) target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") else() diff --git a/README.md b/README.md index 3db1c9e8..28860913 100644 --- a/README.md +++ b/README.md @@ -99,71 +99,122 @@ cd build ### BLAS Support -By default, the library uses Eigen's internal implementations for matrix-matrix multiplication for fitting and sampling. -This does not require any additional system libraries. -However, the performance is not as good as the BLAS implementation. +BLAS support is **mandatory and always enabled** in this library. BLAS routines are used for performance-critical operations in fitting (`fit_tau`, `fit_matsubara`) and evaluation (`evaluate_tau`, `evaluate_matsubara`). -For performance critical applications, we recommend using BLAS. -For this, you need to set the `SPARSEIR_USE_BLAS` CMake option to `ON`. +#### Two Modes for BLAS Provision -```bash -cmake .. -DSPARSEIR_USE_BLAS=OFF -cmake .. -DSPARSEIR_USE_BLAS=ON -``` +The library supports two modes for providing BLAS functions: -Alternatively, you can set the `SPARSEIR_USE_BLAS` CMake option to `ON` in the `CMakeLists.txt` file of your project: +1. **Link-time BLAS (default)**: BLAS library is linked at build time +2. **Runtime BLAS registration**: BLAS function pointers are provided at runtime via C-API (used when `SPARSEIR_USE_EXTERN_FBLAS_PTR` is defined) -```cmake -cmake_minimum_required(VERSION 3.10) -project(MyProject) +The choice between these modes is determined at compile time based on the `SPARSEIR_USE_EXTERN_FBLAS_PTR` CMake option. -set(SPARSEIR_USE_BLAS ON) -``` +--- + +#### Mode 1: Link-time BLAS (Default) + +In this mode, a BLAS library (OpenBLAS, Intel MKL, Apple Accelerate, etc.) is linked at build time. The library uses dynamic symbol resolution to automatically detect and use the appropriate BLAS functions at runtime. -Note: When enabling BLAS, ensure that the appropriate libraries (such as OpenBLAS, Intel MKL, or Apple Accelerate) can be found by CMake. +**Dynamic Symbol Resolution:** -#### ILP64 BLAS Support +The library uses `dlopen`/`dlsym` to dynamically resolve Fortran BLAS function symbols at runtime: +- Supports multiple BLAS implementations and symbol naming conventions (`dgemm_`, `dgemm`, `DGEMM_`, `DGEMM`, etc.) +- **ILP64 interfaces are prioritized over LP64 interfaces**: + 1. First attempts to find ILP64 symbols (`dgemm64_`, `dgemm64`, `ZGEMM64_`, etc.) + 2. If ILP64 symbols are found, they are used + 3. Otherwise, falls back to LP64 symbols (`dgemm_`, `dgemm`, `DGEMM_`, etc.) -For applications requiring large matrix operations (matrices larger than 2^31 elements), you can enable ILP64 BLAS support by setting both `SPARSEIR_USE_BLAS` and `SPARSEIR_USE_ILP64` to `ON`: +This automatic detection happens when the library is loaded, without requiring any runtime configuration. + +**Building with Link-time BLAS:** ```bash -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON +# Standard build with BLAS auto-detection +mkdir -p build && cd build +cmake .. +cmake --build . + +# On Ubuntu with OpenBLAS +sudo apt install libopenblas-dev +cmake .. + +# On macOS (uses Accelerate framework automatically) +cmake .. ``` -This option: -- Uses 64-bit integers for matrix dimensions and leading dimensions -- Directly calls Fortran BLAS interfaces instead of CBLAS -- Requires ILP64-compatible BLAS libraries (e.g., `libopenblas64-0` on Ubuntu) +**For ILP64 BLAS:** + +If you need ILP64 support for large matrix operations (matrices larger than 2^31 elements), install ILP64-compatible BLAS libraries: -Example with ILP64 OpenBLAS on Ubuntu: ```bash +# Ubuntu with ILP64 OpenBLAS sudo apt install libopenblas64-0 libopenblas64-dev -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON +cmake .. -DSPARSEIR_USE_BLAS_ILP64=ON ``` -#### Manual BLAS Library Specification +**Note**: The `SPARSEIR_USE_BLAS_ILP64` CMake option only affects which BLAS library CMake searches for during configuration (sets `BLA_SIZEOF_INTEGER=8`). At runtime, the library always tries ILP64 symbols first regardless of this option. -If CMake cannot automatically find the ILP64 BLAS library, you can specify it manually: +**Manual BLAS Library Specification:** + +If CMake cannot automatically find the BLAS library, you can specify it manually: ```bash -# Specify the exact library path -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON \ +# For standard LP64 BLAS +cmake .. -DBLAS_LIBRARIES=/usr/lib/x86_64-linux-gnu/libopenblas.so + +# For ILP64 BLAS +cmake .. -DSPARSEIR_USE_BLAS_ILP64=ON \ -DBLAS_LIBRARIES=/usr/lib/x86_64-linux-gnu/libopenblas64.so.0 # Or use environment variables to help CMake find it export BLA_VENDOR=OpenBLAS -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON +cmake .. ``` -#### Troubleshooting ILP64 BLAS +--- -If you encounter linking errors like "undefined reference to `dgemm_`", ensure: +#### Mode 2: Runtime BLAS Registration -1. **ILP64 BLAS is installed**: Check with `find /usr/lib -name "*openblas64*"` -2. **Library contains ILP64 symbols**: Verify with `nm -D /path/to/libopenblas64.so | grep dgemm_` -3. **CMake finds the library**: Check the CMake output for "ILP64 BLAS found" messages +In this mode (enabled with `-DSPARSEIR_USE_EXTERN_FBLAS_PTR=ON`), the library does not link BLAS at build time. Instead, BLAS function pointers must be registered at runtime before using the library. This mode is primarily used for language bindings (e.g., Python) where BLAS functions are provided by the host environment. -**Note**: ILP64 support requires `SPARSEIR_USE_BLAS=ON`. +**Building with Runtime BLAS Registration:** + +```bash +mkdir -p build && cd build +cmake .. -DSPARSEIR_USE_EXTERN_FBLAS_PTR=ON +cmake --build . +``` + +**Registering BLAS Functions:** + +You must call one of these registration functions before using any BLAS functionality: + +**For LP64 interface (32-bit integers):** +```c +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); +``` + +**For ILP64 interface (64-bit integers):** +```c +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); +``` + +**Important**: +- Only one registration function should be called (either LP64 or ILP64, not both) +- The library will throw a runtime error if BLAS functions are used without prior registration + +**Example (Python with ctypes):** + +```python +import ctypes +import scipy.linalg.cython_blas as blas + +lib = ctypes.CDLL("libsparseir.so") +dgemm_ptr = ctypes.cast(blas.dgemm, ctypes.c_void_p).value +zgemm_ptr = ctypes.cast(blas.zgemm, ctypes.c_void_p).value +lib.spir_register_dgemm_zgemm_lp64(dgemm_ptr, zgemm_ptr) +``` ### Debug Logging at runtime @@ -188,6 +239,27 @@ This will create the `docs/html` directory. Open `docs/html/index.html` with you Please refer [`./sample_c/README.md`](./sample_c/README.md) to learn more. +## Python Bindings + +Python bindings are located in the `python/` directory. The bindings use `SPARSEIR_USE_EXTERN_FBLAS_PTR` to register BLAS function pointers from SciPy/Numpy at runtime. + +### Testing Python Bindings + +To test the Python bindings, use the provided `run_tests.sh` script: + +```bash +cd python +./run_tests.sh +``` + +This script: +1. Cleans up previous build artifacts (copied source files, `.venv`, build cache) +2. Sets up the build environment using `setup_build.py` +3. Installs dependencies and rebuilds the package using `uv sync --refresh` +4. Runs the test suite using `uv run pytest tests/ -v` + +The script ensures a clean build environment and automatically handles dependency management with `uv`. + ## For developers diff --git a/backend/cxx/CMakeLists.txt b/backend/cxx/CMakeLists.txt index 9b8002f9..9ab51ce0 100644 --- a/backend/cxx/CMakeLists.txt +++ b/backend/cxx/CMakeLists.txt @@ -20,7 +20,7 @@ set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Options -option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface" OFF) +option(SPARSEIR_USE_BLAS_ILP64 "Enable ILP64 BLAS interface (64-bit integers)" OFF) option(SPARSEIR_BUILD_TESTING "Enable testing" OFF) option(SPARSEIR_DEBUG "Enable debug logging" OFF) @@ -56,14 +56,27 @@ if(NOT xprec_POPULATED) endif() # BLAS configuration (always enabled) -if(SPARSEIR_USE_ILP64) - add_compile_definitions(SPARSEIR_USE_ILP64) - # Try to find ILP64 BLAS - if(NOT BLAS_LIBRARIES) - find_library(BLAS_LIBRARIES NAMES openblas64) +# Set BLAS integer size based on SPARSEIR_USE_BLAS_ILP64 +if(SPARSEIR_USE_BLAS_ILP64) + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "Searching for ILP64 BLAS interface (64-bit integers)") +else() + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "Searching for LP64 BLAS interface (32-bit integers, default)") +endif() + +# Find BLAS package using standard CMake FindBLAS +find_package(BLAS REQUIRED) + +if(BLAS_FOUND) + message(STATUS "BLAS libraries found: ${BLAS_LIBRARIES}") + if(SPARSEIR_USE_BLAS_ILP64) + message(STATUS "BLAS with ILP64 interface (BLA_SIZEOF_INTEGER=8)") + else() + message(STATUS "BLAS with LP64 interface (BLA_SIZEOF_INTEGER=4)") endif() else() - find_package(BLAS REQUIRED) + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() # C++ Library Build From d88c3ce66acb3991e0ae67abada46dddd44041a4 Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 20:01:37 +0900 Subject: [PATCH 04/11] fix: use PRIVATE linking with --no-as-needed for BLAS - Change BLAS linking back to PRIVATE (proper encapsulation) - Add LINKER:--no-as-needed flag to prevent linker from dropping BLAS dependency - This ensures BLAS symbols are available for dlopen/dlsym at runtime - Avoids unnecessary dependency propagation to consumers - Fixes "zgemm symbol not found" errors while maintaining clean design --- CMakeLists.txt | 8 ++++++++ backend/cxx/CMakeLists.txt | 8 ++++++++ fortran/test_with_cxx_backend.sh | 3 +-- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 445abf9f..bab5a8ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,9 +155,17 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) # Add BLAS linking (always enabled) +# Link BLAS privately, but ensure it's not dropped by linker +# This allows dlopen/dlsym to find BLAS symbols at runtime if(SPARSEIR_USE_BLAS_ILP64 OR BLAS_FOUND) target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") + + # Ensure BLAS dependency is retained even if symbols are not directly used + # This is necessary for dynamic symbol resolution at runtime + if(NOT APPLE) + target_link_options(sparseir PRIVATE "LINKER:--no-as-needed") + endif() else() message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() diff --git a/backend/cxx/CMakeLists.txt b/backend/cxx/CMakeLists.txt index 9ab51ce0..06fe00ed 100644 --- a/backend/cxx/CMakeLists.txt +++ b/backend/cxx/CMakeLists.txt @@ -120,9 +120,17 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) # BLAS/LAPACK linking (required) +# Link BLAS privately, but ensure it's not dropped by linker +# This allows dlopen/dlsym to find BLAS symbols at runtime if(BLAS_LIBRARIES OR BLAS_FOUND) target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") + + # Ensure BLAS dependency is retained even if symbols are not directly used + # This is necessary for dynamic symbol resolution at runtime + if(NOT APPLE) + target_link_options(sparseir PRIVATE "LINKER:--no-as-needed") + endif() else() message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() diff --git a/fortran/test_with_cxx_backend.sh b/fortran/test_with_cxx_backend.sh index 2a6bff64..20773bdf 100755 --- a/fortran/test_with_cxx_backend.sh +++ b/fortran/test_with_cxx_backend.sh @@ -30,8 +30,7 @@ cd "$WORK_DIR/build_backend" cmake "$BACKEND_DIR" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ - -DSPARSEIR_BUILD_TESTING=OFF \ - -DSPARSEIR_USE_BLAS=ON + -DSPARSEIR_BUILD_TESTING=OFF echo -e "${YELLOW}Building backend/cxx...${NC}" cmake --build . -j$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) From dac0878afe3016212272c42d8f450dce16ff9bea Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 20:15:58 +0900 Subject: [PATCH 05/11] fix: guard BLAS registration functions with SPARSEIR_USE_EXTERN_FBLAS_PTR - Wrap spir_register_dgemm_zgemm_* declarations in sparseir.h with #ifdef - Add clarifying comments about no BLAS symbols in external mode - Ensures registration functions are only available when needed --- backend/cxx/include/sparseir/gemm.hpp | 77 +++++--- backend/cxx/include/sparseir/sparseir.h | 4 + backend/cxx/src/gemm.cpp | 251 +----------------------- backend/cxx/src/gemm_external.impl | 109 ++++++++++ backend/cxx/src/gemm_link.impl | 75 +++++++ 5 files changed, 246 insertions(+), 270 deletions(-) create mode 100644 backend/cxx/src/gemm_external.impl create mode 100644 backend/cxx/src/gemm_link.impl diff --git a/backend/cxx/include/sparseir/gemm.hpp b/backend/cxx/include/sparseir/gemm.hpp index df27e094..c63ec444 100644 --- a/backend/cxx/include/sparseir/gemm.hpp +++ b/backend/cxx/include/sparseir/gemm.hpp @@ -15,8 +15,23 @@ namespace sparseir { -// Fortran BLAS interface wrapper functions (Fortran-style arguments) -// For LP64 (32-bit integers) +// Fortran BLAS interface wrapper functions +// Integer type is selected at compile time based on SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_ILP64 +// ILP64 interface: 64-bit integers +void my_dgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc); + +void my_zgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc); +#else +// LP64 interface: 32-bit integers (default) void my_dgemm(const char* transa, const char* transb, const int* m, const int* n, const int* k, const double* alpha, const double* a, const int* lda, @@ -28,19 +43,7 @@ void my_zgemm(const char* transa, const char* transb, const void* alpha, const void* a, const int* lda, const void* b, const int* ldb, const void* beta, void* c, const int* ldc); - -// For ILP64 (64-bit integers) -void my_dgemm64(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const double* alpha, const double* a, const long long* lda, - const double* b, const long long* ldb, const double* beta, - double* c, const long long* ldc); - -void my_zgemm64(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const void* alpha, const void* a, const long long* lda, - const void* b, const long long* ldb, const void* beta, - void* c, const long long* ldc); +#endif // Type-specific CBLAS GEMM implementations template @@ -65,9 +68,18 @@ inline void _gemm_blas_impl(const double *A, { const char transa = 'N', transb = 'N'; const double alpha = 1.0, beta = 0.0; +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); - my_dgemm64(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -81,9 +93,14 @@ inline void _gemm_blas_impl, std::complex, const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); @@ -104,9 +121,14 @@ inline void _gemm_blas_impl, std::complex>( const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A_complex.data()), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); @@ -127,9 +149,14 @@ inline void _gemm_blas_impl, double, std::complex>( const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); @@ -146,7 +173,7 @@ inline void _gemm_blas_impl_transpose(const double *A, const double alpha = 1.0, beta = 0.0; const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); - my_dgemm64(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); + my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -164,7 +191,7 @@ _gemm_blas_impl_transpose, std::complex, const std::complex beta(0.0); const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); @@ -221,7 +248,7 @@ _gemm_blas_impl_conj(const double *A, const double *B, const double alpha = 1.0, beta = 0.0; const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); - my_dgemm64(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); + my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -237,7 +264,7 @@ inline void _gemm_blas_impl_conj, std::complex, const std::complex beta(0.0); const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); @@ -261,7 +288,7 @@ _gemm_blas_impl_conj, double, std::complex>( const std::complex beta(0.0); const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); @@ -286,7 +313,7 @@ _gemm_blas_impl_conj, std::complex>( const std::complex beta(0.0); const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); - my_zgemm64(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A_complex.data()), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), reinterpret_cast(C), &ldc); diff --git a/backend/cxx/include/sparseir/sparseir.h b/backend/cxx/include/sparseir/sparseir.h index ee5b17c0..453b3450 100644 --- a/backend/cxx/include/sparseir/sparseir.h +++ b/backend/cxx/include/sparseir/sparseir.h @@ -1257,6 +1257,8 @@ int spir_sampling_fit_zd(const spir_sampling *s, int order, int ndim, const int *input_dims, int target_dim, const c_complex *input, double *out); +#ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR + /** * @brief Register BLAS function pointers for LP64 interface (32-bit integers) * @@ -1293,6 +1295,8 @@ void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); */ void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); +#endif // SPARSEIR_USE_EXTERN_FBLAS_PTR + #ifdef __cplusplus } #endif diff --git a/backend/cxx/src/gemm.cpp b/backend/cxx/src/gemm.cpp index 96dea9d1..269cd967 100644 --- a/backend/cxx/src/gemm.cpp +++ b/backend/cxx/src/gemm.cpp @@ -1,254 +1,15 @@ #include "sparseir/gemm.hpp" -#include -#include -#include - +#ifdef SPARSEIR_USE_BLAS namespace sparseir { +#endif // SPARSEIR_USE_BLAS -// Fortran dgemm function pointer types (both ILP64 and LP64) -using dgemm_fptr_lp64 = void(*)(const char*, const char*, const int*, - const int*, const int*, const double*, - const double*, const int*, const double*, - const int*, const double*, double*, const int*); - -using zgemm_fptr_lp64 = void(*)(const char*, const char*, const int*, - const int*, const int*, const void*, - const void*, const int*, const void*, - const int*, const void*, void*, const int*); - -using dgemm_fptr_ilp64 = void(*)(const char*, const char*, const long long*, - const long long*, const long long*, const double*, - const double*, const long long*, const double*, - const long long*, const double*, double*, const long long*); - -using zgemm_fptr_ilp64 = void(*)(const char*, const char*, const long long*, - const long long*, const long long*, const void*, - const void*, const long long*, const void*, - const long long*, const void*, void*, const long long*); - -// Storage for registered function pointers -static dgemm_fptr_lp64 registered_dgemm_lp64 = nullptr; -static zgemm_fptr_lp64 registered_zgemm_lp64 = nullptr; -static dgemm_fptr_ilp64 registered_dgemm_ilp64 = nullptr; -static zgemm_fptr_ilp64 registered_zgemm_ilp64 = nullptr; -static bool symbols_resolved = false; -static bool use_ilp64 = false; - -// Function to resolve BLAS symbols at runtime -template -FuncPtr resolve_blas_symbol(const char* patterns[], size_t count) { - // Try RTLD_DEFAULT first (searches already-loaded libraries) - void* handle = RTLD_DEFAULT; - - for (size_t i = 0; i < count; ++i) { - void* sym = dlsym(handle, patterns[i]); - if (sym) { - return reinterpret_cast(sym); - } - } - - // If not found, try dlopen(NULL) to search main program and all loaded libraries - handle = dlopen(NULL, RTLD_LAZY); - if (handle) { - for (size_t i = 0; i < count; ++i) { - void* sym = dlsym(handle, patterns[i]); - if (sym) { - dlclose(handle); - return reinterpret_cast(sym); - } - } - dlclose(handle); - } - - return nullptr; -} - -// Initialize BLAS symbols once - try ILP64 first, then fall back to LP64 -static void init_blas_symbols() { - if (symbols_resolved) { - return; - } - #ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR - // When using external function pointers, check if they have been registered - if ((!registered_dgemm_lp64 && !registered_dgemm_ilp64) || - (!registered_zgemm_lp64 && !registered_zgemm_ilp64)) { - throw std::runtime_error( - "BLAS function pointers not registered - call spir_register_dgemm_zgemm_lp64() " - "or spir_register_dgemm_zgemm_ilp64() before using BLAS functions"); - } - // Determine which interface is being used - use_ilp64 = (registered_dgemm_ilp64 != nullptr && registered_zgemm_ilp64 != nullptr); - symbols_resolved = true; - return; + #include "gemm_external.impl" #else - // First, try ILP64 symbol patterns - const char* dgemm_patterns_ilp64[] = { - "dgemm64_", "dgemm64", "DGEMM64_", "DGEMM64" - }; - const char* zgemm_patterns_ilp64[] = { - "zgemm64_", "zgemm64", "ZGEMM64_", "ZGEMM64" - }; - - registered_dgemm_ilp64 = resolve_blas_symbol( - dgemm_patterns_ilp64, sizeof(dgemm_patterns_ilp64) / sizeof(dgemm_patterns_ilp64[0])); - registered_zgemm_ilp64 = resolve_blas_symbol( - zgemm_patterns_ilp64, sizeof(zgemm_patterns_ilp64) / sizeof(zgemm_patterns_ilp64[0])); - - // If ILP64 found, use ILP64 - if (registered_dgemm_ilp64 && registered_zgemm_ilp64) { - use_ilp64 = true; - symbols_resolved = true; - return; - } - - // Fall back to LP64 symbol patterns - const char* dgemm_patterns_lp64[] = { - "dgemm_", "dgemm", "DGEMM_", "DGEMM" - }; - const char* zgemm_patterns_lp64[] = { - "zgemm_", "zgemm", "ZGEMM_", "ZGEMM" - }; - - registered_dgemm_lp64 = resolve_blas_symbol( - dgemm_patterns_lp64, sizeof(dgemm_patterns_lp64) / sizeof(dgemm_patterns_lp64[0])); - registered_zgemm_lp64 = resolve_blas_symbol( - zgemm_patterns_lp64, sizeof(zgemm_patterns_lp64) / sizeof(zgemm_patterns_lp64[0])); - - use_ilp64 = false; - symbols_resolved = true; + #include "gemm_link.impl" #endif -} - -extern "C" { - -// Register function pointers from outside (for SPARSEIR_USE_EXTERN_FBLAS_PTR) -// Register both dgemm and zgemm at the same time (LP64) -void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn) { - registered_dgemm_lp64 = reinterpret_cast(dgemm_fn); - registered_zgemm_lp64 = reinterpret_cast(zgemm_fn); - use_ilp64 = false; - symbols_resolved = true; -} - -// Register function pointers from outside (for SPARSEIR_USE_EXTERN_FBLAS_PTR) -// Register both dgemm and zgemm at the same time (ILP64) -void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn) { - registered_dgemm_ilp64 = reinterpret_cast(dgemm_fn); - registered_zgemm_ilp64 = reinterpret_cast(zgemm_fn); - use_ilp64 = true; - symbols_resolved = true; -} - -} // extern "C" - -// Implementation of my_dgemm - Fortran BLAS interface (LP64) -void my_dgemm(const char* transa, const char* transb, - const int* m, const int* n, const int* k, - const double* alpha, const double* a, const int* lda, - const double* b, const int* ldb, const double* beta, - double* c, const int* ldc) -{ - // Initialize symbols if not already done - init_blas_symbols(); - - if (use_ilp64) { - // Convert LP64 arguments to ILP64 - if (!registered_dgemm_ilp64) { - throw std::runtime_error("dgemm (ILP64) symbol not found - BLAS library may not be linked correctly"); - } - const long long m_ll = static_cast(*m), n_ll = static_cast(*n), k_ll = static_cast(*k); - const long long lda_ll = static_cast(*lda), ldb_ll = static_cast(*ldb), ldc_ll = static_cast(*ldc); - registered_dgemm_ilp64(transa, transb, &m_ll, &n_ll, &k_ll, alpha, a, &lda_ll, b, &ldb_ll, beta, c, &ldc_ll); - } else { - // Use LP64 directly - if (!registered_dgemm_lp64) { - throw std::runtime_error("dgemm (LP64) symbol not found - BLAS library may not be linked correctly"); - } - registered_dgemm_lp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); - } -} - -// Implementation of my_zgemm - Fortran BLAS interface (LP64) -void my_zgemm(const char* transa, const char* transb, - const int* m, const int* n, const int* k, - const void* alpha, const void* a, const int* lda, - const void* b, const int* ldb, const void* beta, - void* c, const int* ldc) -{ - // Initialize symbols if not already done - init_blas_symbols(); - - if (use_ilp64) { - // Convert LP64 arguments to ILP64 - if (!registered_zgemm_ilp64) { - throw std::runtime_error("zgemm (ILP64) symbol not found - BLAS library may not be linked correctly"); - } - const long long m_ll = static_cast(*m), n_ll = static_cast(*n), k_ll = static_cast(*k); - const long long lda_ll = static_cast(*lda), ldb_ll = static_cast(*ldb), ldc_ll = static_cast(*ldc); - registered_zgemm_ilp64(transa, transb, &m_ll, &n_ll, &k_ll, alpha, a, &lda_ll, b, &ldb_ll, beta, c, &ldc_ll); - } else { - // Use LP64 directly - if (!registered_zgemm_lp64) { - throw std::runtime_error("zgemm (LP64) symbol not found - BLAS library may not be linked correctly"); - } - registered_zgemm_lp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); - } -} - -// Implementation of my_dgemm64 - Fortran BLAS interface (ILP64) -void my_dgemm64(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const double* alpha, const double* a, const long long* lda, - const double* b, const long long* ldb, const double* beta, - double* c, const long long* ldc) -{ - // Initialize symbols if not already done - init_blas_symbols(); - - if (use_ilp64) { - // Use ILP64 directly - if (!registered_dgemm_ilp64) { - throw std::runtime_error("dgemm64 (ILP64) symbol not found - BLAS library may not be linked correctly"); - } - registered_dgemm_ilp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); - } else { - // Convert ILP64 arguments to LP64 - if (!registered_dgemm_lp64) { - throw std::runtime_error("dgemm (LP64) symbol not found - BLAS library may not be linked correctly"); - } - const int m_int = static_cast(*m), n_int = static_cast(*n), k_int = static_cast(*k); - const int lda_int = static_cast(*lda), ldb_int = static_cast(*ldb), ldc_int = static_cast(*ldc); - registered_dgemm_lp64(transa, transb, &m_int, &n_int, &k_int, alpha, a, &lda_int, b, &ldb_int, beta, c, &ldc_int); - } -} - -// Implementation of my_zgemm64 - Fortran BLAS interface (ILP64) -void my_zgemm64(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const void* alpha, const void* a, const long long* lda, - const void* b, const long long* ldb, const void* beta, - void* c, const long long* ldc) -{ - // Initialize symbols if not already done - init_blas_symbols(); - - if (use_ilp64) { - // Use ILP64 directly - if (!registered_zgemm_ilp64) { - throw std::runtime_error("zgemm64 (ILP64) symbol not found - BLAS library may not be linked correctly"); - } - registered_zgemm_ilp64(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); - } else { - // Convert ILP64 arguments to LP64 - if (!registered_zgemm_lp64) { - throw std::runtime_error("zgemm (LP64) symbol not found - BLAS library may not be linked correctly"); - } - const int m_int = static_cast(*m), n_int = static_cast(*n), k_int = static_cast(*k); - const int lda_int = static_cast(*lda), ldb_int = static_cast(*ldb), ldc_int = static_cast(*ldc); - registered_zgemm_lp64(transa, transb, &m_int, &n_int, &k_int, alpha, a, &lda_int, b, &ldb_int, beta, c, &ldc_int); - } -} +#ifdef SPARSEIR_USE_BLAS } // namespace sparseir +#endif // SPARSEIR_USE_BLAS diff --git a/backend/cxx/src/gemm_external.impl b/backend/cxx/src/gemm_external.impl new file mode 100644 index 00000000..a273d982 --- /dev/null +++ b/backend/cxx/src/gemm_external.impl @@ -0,0 +1,109 @@ +// External function pointer registration implementation +// This file is included when SPARSEIR_USE_EXTERN_FBLAS_PTR is defined +// Note: This file is included inside namespace sparseir {} in gemm.cpp +// +// IMPORTANT: In this mode, BLAS libraries are NOT directly linked. +// Therefore, dgemm_ and zgemm_ symbols do NOT exist. +// All BLAS calls go through externally registered function pointers. + +#include +#include + +// Function pointer types for external registration +// (No extern "C" dgemm_/zgemm_ declarations here - they don't exist when using external pointers) +#ifdef SPARSEIR_USE_ILP64 +using dgemm_fptr = void(*)(const char*, const char*, const long long*, + const long long*, const long long*, const double*, + const double*, const long long*, const double*, + const long long*, const double*, double*, const long long*); + +using zgemm_fptr = void(*)(const char*, const char*, const long long*, + const long long*, const long long*, const void*, + const void*, const long long*, const void*, + const long long*, const void*, void*, const long long*); +#else +using dgemm_fptr = void(*)(const char*, const char*, const int*, + const int*, const int*, const double*, + const double*, const int*, const double*, + const int*, const double*, double*, const int*); + +using zgemm_fptr = void(*)(const char*, const char*, const int*, + const int*, const int*, const void*, + const void*, const int*, const void*, + const int*, const void*, void*, const int*); +#endif + +// Storage for externally registered function pointers +static dgemm_fptr registered_dgemm = nullptr; +static zgemm_fptr registered_zgemm = nullptr; + +extern "C" { + +// Register function pointers from outside (for Python bindings) +#ifdef SPARSEIR_USE_ILP64 +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn) { + registered_dgemm = reinterpret_cast(dgemm_fn); + registered_zgemm = reinterpret_cast(zgemm_fn); +} +#else +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn) { + registered_dgemm = reinterpret_cast(dgemm_fn); + registered_zgemm = reinterpret_cast(zgemm_fn); +} +#endif + +} // extern "C" + +#ifdef SPARSEIR_USE_ILP64 +// ILP64 interface +void my_dgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc) +{ + if (!registered_dgemm) { + throw std::runtime_error("dgemm function pointer not registered - call spir_register_dgemm_zgemm_ilp64() first"); + } + registered_dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc) +{ + if (!registered_zgemm) { + throw std::runtime_error("zgemm function pointer not registered - call spir_register_dgemm_zgemm_ilp64() first"); + } + registered_zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +#else +// LP64 interface (default) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc) +{ + if (!registered_dgemm) { + throw std::runtime_error("dgemm function pointer not registered - call spir_register_dgemm_zgemm_lp64() first"); + } + registered_dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc) +{ + if (!registered_zgemm) { + throw std::runtime_error("zgemm function pointer not registered - call spir_register_dgemm_zgemm_lp64() first"); + } + registered_zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} +#endif + diff --git a/backend/cxx/src/gemm_link.impl b/backend/cxx/src/gemm_link.impl new file mode 100644 index 00000000..84edc8c2 --- /dev/null +++ b/backend/cxx/src/gemm_link.impl @@ -0,0 +1,75 @@ +// Direct BLAS linking implementation +// This file is included when SPARSEIR_USE_EXTERN_FBLAS_PTR is NOT defined +// Note: This file is included inside namespace sparseir {} in gemm.cpp + +#include +#include + +// Declare Fortran BLAS functions based on compile-time ILP64/LP64 selection +extern "C" { +#ifdef SPARSEIR_USE_ILP64 + // ILP64 interface: 64-bit integers + void dgemm_(const char* transa, const char* transb, const long long* m, + const long long* n, const long long* k, const double* alpha, + const double* a, const long long* lda, const double* b, + const long long* ldb, const double* beta, double* c, const long long* ldc); + + void zgemm_(const char* transa, const char* transb, const long long* m, + const long long* n, const long long* k, const void* alpha, + const void* a, const long long* lda, const void* b, + const long long* ldb, const void* beta, void* c, const long long* ldc); +#else + // LP64 interface: 32-bit integers (default) + void dgemm_(const char* transa, const char* transb, const int* m, + const int* n, const int* k, const double* alpha, + const double* a, const int* lda, const double* b, + const int* ldb, const double* beta, double* c, const int* ldc); + + void zgemm_(const char* transa, const char* transb, const int* m, + const int* n, const int* k, const void* alpha, + const void* a, const int* lda, const void* b, + const int* ldb, const void* beta, void* c, const int* ldc); +#endif +} + +#ifdef SPARSEIR_USE_ILP64 +// ILP64 interface +void my_dgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc) +{ + dgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc) +{ + zgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +#else +// LP64 interface (default) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc) +{ + dgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc) +{ + zgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} +#endif + From 656e69f3f4eef1247efb25b0bb2d88c2df5570e4 Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 20:20:19 +0900 Subject: [PATCH 06/11] fix: correct gemm.hpp preprocessor directives and namespace scope - Fix duplicate #else/#endif blocks in gemm.hpp - Wrap all integer type declarations with #ifdef SPARSEIR_USE_ILP64 blocks - Move #include statements inside namespace sparseir {} in gemm.cpp - Remove duplicate alpha/beta declaration lines - Ensures my_dgemm/my_zgemm functions are properly defined in namespace --- backend/cxx/include/sparseir/gemm.hpp | 34 +++++++++++++++++++++++---- backend/cxx/src/gemm.cpp | 2 -- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/backend/cxx/include/sparseir/gemm.hpp b/backend/cxx/include/sparseir/gemm.hpp index c63ec444..18531c28 100644 --- a/backend/cxx/include/sparseir/gemm.hpp +++ b/backend/cxx/include/sparseir/gemm.hpp @@ -71,10 +71,6 @@ inline void _gemm_blas_impl(const double *A, #ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); -#else - const int m = static_cast(M), n = static_cast(N), k = static_cast(K); - const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); -#endif #else const int m = static_cast(M), n = static_cast(N), k = static_cast(K); const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); @@ -171,8 +167,13 @@ inline void _gemm_blas_impl_transpose(const double *A, { const char transa = 'N', transb = 'T'; const double alpha = 1.0, beta = 0.0; +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#endif my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } @@ -189,8 +190,13 @@ _gemm_blas_impl_transpose, std::complex, const char transa = 'N', transb = 'T'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#endif my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), @@ -246,8 +252,13 @@ _gemm_blas_impl_conj(const double *A, const double *B, { const char transa = 'T', transb = 'N'; const double alpha = 1.0, beta = 0.0; +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } @@ -262,8 +273,13 @@ inline void _gemm_blas_impl_conj, std::complex, const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), @@ -286,8 +302,13 @@ _gemm_blas_impl_conj, double, std::complex>( const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A), &lda, reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), @@ -311,8 +332,13 @@ _gemm_blas_impl_conj, std::complex>( const char transa = 'T', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); +#ifdef SPARSEIR_USE_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), reinterpret_cast(A_complex.data()), &lda, reinterpret_cast(B), &ldb, reinterpret_cast(&beta), diff --git a/backend/cxx/src/gemm.cpp b/backend/cxx/src/gemm.cpp index 269cd967..8c88246d 100644 --- a/backend/cxx/src/gemm.cpp +++ b/backend/cxx/src/gemm.cpp @@ -2,7 +2,6 @@ #ifdef SPARSEIR_USE_BLAS namespace sparseir { -#endif // SPARSEIR_USE_BLAS #ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR #include "gemm_external.impl" @@ -10,6 +9,5 @@ namespace sparseir { #include "gemm_link.impl" #endif -#ifdef SPARSEIR_USE_BLAS } // namespace sparseir #endif // SPARSEIR_USE_BLAS From d86985b38d85aa1c7297dd3edd4f0070d4648abc Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 20:21:52 +0900 Subject: [PATCH 07/11] fix: remove SPARSEIR_USE_BLAS guard since BLAS is always enabled - Remove #ifdef SPARSEIR_USE_BLAS from gemm.cpp - BLAS support is mandatory, so no conditional compilation needed - Fixes undefined symbol errors for my_dgemm/my_zgemm --- backend/cxx/src/gemm.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/cxx/src/gemm.cpp b/backend/cxx/src/gemm.cpp index 8c88246d..cb82312f 100644 --- a/backend/cxx/src/gemm.cpp +++ b/backend/cxx/src/gemm.cpp @@ -1,6 +1,5 @@ #include "sparseir/gemm.hpp" -#ifdef SPARSEIR_USE_BLAS namespace sparseir { #ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR @@ -10,4 +9,3 @@ namespace sparseir { #endif } // namespace sparseir -#endif // SPARSEIR_USE_BLAS From 33c60fecd4187c00c2152fcff80ee237b995e787 Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 20:34:51 +0900 Subject: [PATCH 08/11] refactor: unify BLAS ILP64 option and fix external mode - Rename SPARSEIR_USE_ILP64 to SPARSEIR_USE_BLAS_ILP64 for clarity - Skip BLAS detection when SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled - Update sparseir.h to conditionally declare registration functions - Remove obsolete SPARSEIR_USE_BLAS references from workflows and pyproject.toml - Remove --no-as-needed linker option (no longer needed) - Update README to reflect compile-time ILP64/LP64 selection --- .github/workflows/CI_PublishTestPyPI.yml | 5 +- .github/workflows/PublishPyPI.yml | 5 +- .github/workflows/test_python.yml | 2 - README.md | 50 +++++++++------ backend/cxx/CMakeLists.txt | 81 +++++++++++++----------- backend/cxx/include/sparseir/gemm.hpp | 24 +++---- backend/cxx/include/sparseir/sparseir.h | 30 ++++----- backend/cxx/src/gemm_external.impl | 6 +- backend/cxx/src/gemm_link.impl | 4 +- python/pyproject.toml | 2 - 10 files changed, 108 insertions(+), 101 deletions(-) diff --git a/.github/workflows/CI_PublishTestPyPI.yml b/.github/workflows/CI_PublishTestPyPI.yml index 059a7cd2..b3e6ba17 100644 --- a/.github/workflows/CI_PublishTestPyPI.yml +++ b/.github/workflows/CI_PublishTestPyPI.yml @@ -46,16 +46,13 @@ jobs: CIBW_BUILD_VERBOSITY: 1 CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28" CIBW_BUILD: "${{ matrix.py_tag }}-*" - CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }} SPARSEIR_USE_BLAS=1" + CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_SKIP: "*-manylinux_i686 *-musllinux_i686" # Install OpenBLAS using micromamba (conda-forge) - separate for manylinux and musllinux CIBW_BEFORE_ALL_MANYLINUX: "curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MUSLLINUX: "apk add --no-cache bzip2 && curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MACOS: "brew install openblas" - # Set environment variables to help CMake find OpenBLAS - CIBW_ENVIRONMENT_MANYLINUX: "SPARSEIR_USE_BLAS=1" - CIBW_ENVIRONMENT_MUSLLINUX: "SPARSEIR_USE_BLAS=1" with: package-dir: ./python output-dir: dist diff --git a/.github/workflows/PublishPyPI.yml b/.github/workflows/PublishPyPI.yml index 6f4b2bf6..9c386635 100644 --- a/.github/workflows/PublishPyPI.yml +++ b/.github/workflows/PublishPyPI.yml @@ -91,16 +91,13 @@ jobs: CIBW_BUILD_VERBOSITY: 1 CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28" CIBW_BUILD: "${{ matrix.py_tag }}-*" - CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }} SPARSEIR_USE_BLAS=1" + CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_SKIP: "*-manylinux_i686 *-musllinux_i686" # Install OpenBLAS using micromamba (conda-forge) - separate for manylinux and musllinux CIBW_BEFORE_ALL_MANYLINUX: "curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MUSLLINUX: "apk add --no-cache bzip2 && curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MACOS: "brew install openblas" - # Set environment variables to help CMake find OpenBLAS - CIBW_ENVIRONMENT_MANYLINUX: "SPARSEIR_USE_BLAS=1" - CIBW_ENVIRONMENT_MUSLLINUX: "SPARSEIR_USE_BLAS=1" with: package-dir: ./python output-dir: dist diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index a8d430c8..41c24c1c 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -54,8 +54,6 @@ jobs: run: | chmod +x run_tests.sh ./run_tests.sh - env: - SPARSEIR_USE_BLAS: 1 test-python-rollup: runs-on: ubuntu-latest diff --git a/README.md b/README.md index 28860913..c459f7f4 100644 --- a/README.md +++ b/README.md @@ -114,38 +114,36 @@ The choice between these modes is determined at compile time based on the `SPARS #### Mode 1: Link-time BLAS (Default) -In this mode, a BLAS library (OpenBLAS, Intel MKL, Apple Accelerate, etc.) is linked at build time. The library uses dynamic symbol resolution to automatically detect and use the appropriate BLAS functions at runtime. +In this mode, a BLAS library (OpenBLAS, Intel MKL, Apple Accelerate, etc.) is linked at build time. The Fortran BLAS functions (`dgemm_`, `zgemm_`) are called directly. -**Dynamic Symbol Resolution:** +**ILP64 vs LP64 Interface:** -The library uses `dlopen`/`dlsym` to dynamically resolve Fortran BLAS function symbols at runtime: -- Supports multiple BLAS implementations and symbol naming conventions (`dgemm_`, `dgemm`, `DGEMM_`, `DGEMM`, etc.) -- **ILP64 interfaces are prioritized over LP64 interfaces**: - 1. First attempts to find ILP64 symbols (`dgemm64_`, `dgemm64`, `ZGEMM64_`, etc.) - 2. If ILP64 symbols are found, they are used - 3. Otherwise, falls back to LP64 symbols (`dgemm_`, `dgemm`, `DGEMM_`, etc.) +The library uses compile-time selection to choose between ILP64 (64-bit integers) and LP64 (32-bit integers) BLAS interfaces based on the `SPARSEIR_USE_BLAS_ILP64` CMake option: -This automatic detection happens when the library is loaded, without requiring any runtime configuration. +- **LP64 (default)**: Uses 32-bit integers (`int`) for matrix dimensions. Suitable for matrices up to 2^31-1 elements. +- **ILP64**: Uses 64-bit integers (`long long`) for matrix dimensions. Required for matrices larger than 2^31-1 elements. + +The interface selection is determined at compile time, so you must match the BLAS library interface with the compile-time setting. **Building with Link-time BLAS:** ```bash -# Standard build with BLAS auto-detection +# Standard build with LP64 BLAS (default) mkdir -p build && cd build cmake .. cmake --build . -# On Ubuntu with OpenBLAS +# On Ubuntu with OpenBLAS (LP64) sudo apt install libopenblas-dev cmake .. -# On macOS (uses Accelerate framework automatically) +# On macOS (uses Accelerate framework automatically, LP64) cmake .. ``` **For ILP64 BLAS:** -If you need ILP64 support for large matrix operations (matrices larger than 2^31 elements), install ILP64-compatible BLAS libraries: +If you need ILP64 support for large matrix operations (matrices larger than 2^31 elements), install ILP64-compatible BLAS libraries and enable the ILP64 option: ```bash # Ubuntu with ILP64 OpenBLAS @@ -153,7 +151,11 @@ sudo apt install libopenblas64-0 libopenblas64-dev cmake .. -DSPARSEIR_USE_BLAS_ILP64=ON ``` -**Note**: The `SPARSEIR_USE_BLAS_ILP64` CMake option only affects which BLAS library CMake searches for during configuration (sets `BLA_SIZEOF_INTEGER=8`). At runtime, the library always tries ILP64 symbols first regardless of this option. +**Important**: +- The `SPARSEIR_USE_BLAS_ILP64` CMake option affects both: + 1. Which BLAS library CMake searches for during configuration (sets `BLA_SIZEOF_INTEGER=8` for ILP64 or `BLA_SIZEOF_INTEGER=4` for LP64) + 2. The compiled interface types in the library code (64-bit vs 32-bit integers) +- You must ensure the linked BLAS library matches the compile-time interface selection (ILP64 or LP64) **Manual BLAS Library Specification:** @@ -188,23 +190,30 @@ cmake --build . **Registering BLAS Functions:** -You must call one of these registration functions before using any BLAS functionality: +You must call the appropriate registration function before using any BLAS functionality. The function to call depends on the compile-time setting of `SPARSEIR_USE_BLAS_ILP64`: + +- **If built with LP64** (`SPARSEIR_USE_BLAS_ILP64` not set): Call `spir_register_dgemm_zgemm_lp64` +- **If built with ILP64** (`SPARSEIR_USE_BLAS_ILP64=ON`): Call `spir_register_dgemm_zgemm_ilp64` + +**C API declarations** (only available when `SPARSEIR_USE_EXTERN_FBLAS_PTR` is defined): -**For LP64 interface (32-bit integers):** +For LP64 interface (32-bit integers): ```c void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); ``` -**For ILP64 interface (64-bit integers):** +For ILP64 interface (64-bit integers): ```c void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); ``` **Important**: -- Only one registration function should be called (either LP64 or ILP64, not both) +- The registration function must match the compile-time interface selection (ILP64 or LP64) +- Only one registration function should be called (and only the one matching the build configuration) - The library will throw a runtime error if BLAS functions are used without prior registration +- Function pointers must match the Fortran BLAS signature with the correct integer type (32-bit or 64-bit) -**Example (Python with ctypes):** +**Example (Python with ctypes for LP64 build):** ```python import ctypes @@ -213,7 +222,10 @@ import scipy.linalg.cython_blas as blas lib = ctypes.CDLL("libsparseir.so") dgemm_ptr = ctypes.cast(blas.dgemm, ctypes.c_void_p).value zgemm_ptr = ctypes.cast(blas.zgemm, ctypes.c_void_p).value +# For LP64 build: lib.spir_register_dgemm_zgemm_lp64(dgemm_ptr, zgemm_ptr) +# For ILP64 build, use: +# lib.spir_register_dgemm_zgemm_ilp64(dgemm_ptr, zgemm_ptr) ``` ### Debug Logging at runtime diff --git a/backend/cxx/CMakeLists.txt b/backend/cxx/CMakeLists.txt index 06fe00ed..39630f8e 100644 --- a/backend/cxx/CMakeLists.txt +++ b/backend/cxx/CMakeLists.txt @@ -20,6 +20,7 @@ set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) # Options +option(SPARSEIR_USE_EXTERN_FBLAS_PTR "Use external BLAS function pointers (for Python bindings)" OFF) option(SPARSEIR_USE_BLAS_ILP64 "Enable ILP64 BLAS interface (64-bit integers)" OFF) option(SPARSEIR_BUILD_TESTING "Enable testing" OFF) option(SPARSEIR_DEBUG "Enable debug logging" OFF) @@ -55,28 +56,44 @@ if(NOT xprec_POPULATED) FetchContent_Populate(XPrec) endif() -# BLAS configuration (always enabled) -# Set BLAS integer size based on SPARSEIR_USE_BLAS_ILP64 -if(SPARSEIR_USE_BLAS_ILP64) - set(BLA_SIZEOF_INTEGER 8) - message(STATUS "Searching for ILP64 BLAS interface (64-bit integers)") -else() - set(BLA_SIZEOF_INTEGER 4) - message(STATUS "Searching for LP64 BLAS interface (32-bit integers, default)") -endif() - -# Find BLAS package using standard CMake FindBLAS -find_package(BLAS REQUIRED) - -if(BLAS_FOUND) - message(STATUS "BLAS libraries found: ${BLAS_LIBRARIES}") +# BLAS configuration +if(SPARSEIR_USE_EXTERN_FBLAS_PTR) + # External function pointer mode: BLAS is provided at runtime, no linking needed + message(STATUS "BLAS will be provided via external function pointers at runtime") + add_compile_definitions(SPARSEIR_USE_EXTERN_FBLAS_PTR) + + # Set compile-time ILP64/LP64 selection if(SPARSEIR_USE_BLAS_ILP64) - message(STATUS "BLAS with ILP64 interface (BLA_SIZEOF_INTEGER=8)") + message(STATUS "Using ILP64 interface (64-bit integers) for external BLAS") + add_compile_definitions(SPARSEIR_USE_BLAS_ILP64) else() - message(STATUS "BLAS with LP64 interface (BLA_SIZEOF_INTEGER=4)") + message(STATUS "Using LP64 interface (32-bit integers, default) for external BLAS") endif() else() - message(FATAL_ERROR "BLAS libraries not found - BLAS is required") + # Link-time BLAS mode: find and link BLAS library + # Set BLAS integer size based on SPARSEIR_USE_BLAS_ILP64 + if(SPARSEIR_USE_BLAS_ILP64) + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "Searching for ILP64 BLAS interface (64-bit integers)") + else() + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "Searching for LP64 BLAS interface (32-bit integers, default)") + endif() + + # Find BLAS package using standard CMake FindBLAS + find_package(BLAS REQUIRED) + + if(BLAS_FOUND) + message(STATUS "BLAS libraries found: ${BLAS_LIBRARIES}") + if(SPARSEIR_USE_BLAS_ILP64) + message(STATUS "BLAS with ILP64 interface (BLA_SIZEOF_INTEGER=8)") + add_compile_definitions(SPARSEIR_USE_BLAS_ILP64) + else() + message(STATUS "BLAS with LP64 interface (BLA_SIZEOF_INTEGER=4)") + endif() + else() + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") + endif() endif() # C++ Library Build @@ -119,28 +136,16 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) -# BLAS/LAPACK linking (required) -# Link BLAS privately, but ensure it's not dropped by linker -# This allows dlopen/dlsym to find BLAS symbols at runtime -if(BLAS_LIBRARIES OR BLAS_FOUND) - target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) - message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") - - # Ensure BLAS dependency is retained even if symbols are not directly used - # This is necessary for dynamic symbol resolution at runtime - if(NOT APPLE) - target_link_options(sparseir PRIVATE "LINKER:--no-as-needed") +# BLAS/LAPACK linking (only when not using external function pointers) +if(NOT SPARSEIR_USE_EXTERN_FBLAS_PTR) + if(BLAS_LIBRARIES OR BLAS_FOUND) + target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) + message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") + else() + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() else() - message(FATAL_ERROR "BLAS libraries not found - BLAS is required") -endif() - -# macOS Accelerate framework -if(APPLE) - find_library(ACCELERATE_FRAMEWORK Accelerate) - if(ACCELERATE_FRAMEWORK) - target_link_libraries(sparseir PRIVATE ${ACCELERATE_FRAMEWORK}) - endif() + message(STATUS "BLAS linking skipped (using external function pointers)") endif() # Library convention alias diff --git a/backend/cxx/include/sparseir/gemm.hpp b/backend/cxx/include/sparseir/gemm.hpp index 18531c28..7291658b 100644 --- a/backend/cxx/include/sparseir/gemm.hpp +++ b/backend/cxx/include/sparseir/gemm.hpp @@ -16,8 +16,8 @@ namespace sparseir { // Fortran BLAS interface wrapper functions -// Integer type is selected at compile time based on SPARSEIR_USE_ILP64 -#ifdef SPARSEIR_USE_ILP64 +// Integer type is selected at compile time based on SPARSEIR_USE_BLAS_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 // ILP64 interface: 64-bit integers void my_dgemm(const char* transa, const char* transb, const long long* m, const long long* n, const long long* k, @@ -68,7 +68,7 @@ inline void _gemm_blas_impl(const double *A, { const char transa = 'N', transb = 'N'; const double alpha = 1.0, beta = 0.0; -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); #else @@ -89,7 +89,7 @@ inline void _gemm_blas_impl, std::complex, const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); #else @@ -117,7 +117,7 @@ inline void _gemm_blas_impl, std::complex>( const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); #else @@ -145,7 +145,7 @@ inline void _gemm_blas_impl, double, std::complex>( const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); #else @@ -167,7 +167,7 @@ inline void _gemm_blas_impl_transpose(const double *A, { const char transa = 'N', transb = 'T'; const double alpha = 1.0, beta = 0.0; -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); #else @@ -190,7 +190,7 @@ _gemm_blas_impl_transpose, std::complex, const char transa = 'N', transb = 'T'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); #else @@ -252,7 +252,7 @@ _gemm_blas_impl_conj(const double *A, const double *B, { const char transa = 'T', transb = 'N'; const double alpha = 1.0, beta = 0.0; -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); #else @@ -273,7 +273,7 @@ inline void _gemm_blas_impl_conj, std::complex, const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); #else @@ -302,7 +302,7 @@ _gemm_blas_impl_conj, double, std::complex>( const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); #else @@ -332,7 +332,7 @@ _gemm_blas_impl_conj, std::complex>( const char transa = 'T', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); #else diff --git a/backend/cxx/include/sparseir/sparseir.h b/backend/cxx/include/sparseir/sparseir.h index 453b3450..77a61d27 100644 --- a/backend/cxx/include/sparseir/sparseir.h +++ b/backend/cxx/include/sparseir/sparseir.h @@ -1259,41 +1259,41 @@ int spir_sampling_fit_zd(const spir_sampling *s, int order, int ndim, #ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR +#ifdef SPARSEIR_USE_BLAS_ILP64 /** - * @brief Register BLAS function pointers for LP64 interface (32-bit integers) + * @brief Register BLAS function pointers for ILP64 interface (64-bit integers) * * This function registers both dgemm and zgemm function pointers from an external - * BLAS library with LP64 interface (32-bit integers). This is used when + * BLAS library with ILP64 interface (64-bit integers). This is used when * SPARSEIR_USE_EXTERN_FBLAS_PTR is defined at compile time. * - * @param dgemm_fn Function pointer to Fortran dgemm (LP64 interface) - * @param zgemm_fn Function pointer to Fortran zgemm (LP64 interface) + * @param dgemm_fn Function pointer to Fortran dgemm (ILP64 interface) + * @param zgemm_fn Function pointer to Fortran zgemm (ILP64 interface) * - * @note The function pointers must be valid Fortran BLAS functions with LP64 + * @note The function pointers must be valid Fortran BLAS functions with ILP64 * interface signature * @note This function must be called before using any BLAS functionality when * SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled - * @note Only one registration function should be called (either LP64 or ILP64) */ -void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); - +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); +#else /** - * @brief Register BLAS function pointers for ILP64 interface (64-bit integers) + * @brief Register BLAS function pointers for LP64 interface (32-bit integers) * * This function registers both dgemm and zgemm function pointers from an external - * BLAS library with ILP64 interface (64-bit integers). This is used when + * BLAS library with LP64 interface (32-bit integers). This is used when * SPARSEIR_USE_EXTERN_FBLAS_PTR is defined at compile time. * - * @param dgemm_fn Function pointer to Fortran dgemm (ILP64 interface) - * @param zgemm_fn Function pointer to Fortran zgemm (ILP64 interface) + * @param dgemm_fn Function pointer to Fortran dgemm (LP64 interface) + * @param zgemm_fn Function pointer to Fortran zgemm (LP64 interface) * - * @note The function pointers must be valid Fortran BLAS functions with ILP64 + * @note The function pointers must be valid Fortran BLAS functions with LP64 * interface signature * @note This function must be called before using any BLAS functionality when * SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled - * @note Only one registration function should be called (either LP64 or ILP64) */ -void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); +#endif // SPARSEIR_USE_BLAS_ILP64 #endif // SPARSEIR_USE_EXTERN_FBLAS_PTR diff --git a/backend/cxx/src/gemm_external.impl b/backend/cxx/src/gemm_external.impl index a273d982..c95e5572 100644 --- a/backend/cxx/src/gemm_external.impl +++ b/backend/cxx/src/gemm_external.impl @@ -11,7 +11,7 @@ // Function pointer types for external registration // (No extern "C" dgemm_/zgemm_ declarations here - they don't exist when using external pointers) -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 using dgemm_fptr = void(*)(const char*, const char*, const long long*, const long long*, const long long*, const double*, const double*, const long long*, const double*, @@ -40,7 +40,7 @@ static zgemm_fptr registered_zgemm = nullptr; extern "C" { // Register function pointers from outside (for Python bindings) -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn) { registered_dgemm = reinterpret_cast(dgemm_fn); registered_zgemm = reinterpret_cast(zgemm_fn); @@ -54,7 +54,7 @@ void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn) { } // extern "C" -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 // ILP64 interface void my_dgemm(const char* transa, const char* transb, const long long* m, const long long* n, const long long* k, diff --git a/backend/cxx/src/gemm_link.impl b/backend/cxx/src/gemm_link.impl index 84edc8c2..489ccb69 100644 --- a/backend/cxx/src/gemm_link.impl +++ b/backend/cxx/src/gemm_link.impl @@ -7,7 +7,7 @@ // Declare Fortran BLAS functions based on compile-time ILP64/LP64 selection extern "C" { -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 // ILP64 interface: 64-bit integers void dgemm_(const char* transa, const char* transb, const long long* m, const long long* n, const long long* k, const double* alpha, @@ -32,7 +32,7 @@ extern "C" { #endif } -#ifdef SPARSEIR_USE_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 // ILP64 interface void my_dgemm(const char* transa, const char* transb, const long long* m, const long long* n, const long long* k, diff --git a/python/pyproject.toml b/python/pyproject.toml index b60467b4..547334df 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -42,7 +42,6 @@ cmake.build-type = "Release" # CMake arguments cmake.args = [ - "-DSPARSEIR_USE_BLAS=ON", "-DSPARSEIR_USE_EXTERN_FBLAS_PTR=ON", "-DBUILD_SHARED_LIBS=ON", "-DCMAKE_CXX_STANDARD=11", @@ -67,7 +66,6 @@ sdist.include = [ [tool.scikit-build.cmake.define] -SPARSEIR_USE_BLAS = "ON" BUILD_SHARED_LIBS = "ON" CMAKE_CXX_STANDARD = "11" CMAKE_CXX_STANDARD_REQUIRED = "ON" From 7fcfc90dc094880a48edd1411ca658f45279d3e0 Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 21:02:43 +0900 Subject: [PATCH 09/11] fix: ensure ILP64 BLAS is used in capi_test and capi_sample - Add BLA_SIZEOF_INTEGER setting in capi_test/CMakeLists.txt based on SPARSEIR_USE_BLAS_ILP64 env var - Update test_with_cxx_backend.sh and run_sample.sh to support ILP64 via environment variable - Set SPARSEIR_USE_BLAS_ILP64=1 in ILP64 test workflow steps - Add parallel execution (-j) and timeout for ILP64 tests - Remove macOS Accelerate framework linking from capi_test (no longer needed) --- .github/workflows/test_cxx_backend.yml | 9 +++++++-- capi_sample/run_sample.sh | 4 ++-- capi_test/CMakeLists.txt | 17 +++++++++-------- capi_test/test_with_cxx_backend.sh | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test_cxx_backend.yml b/.github/workflows/test_cxx_backend.yml index 8f4cf816..04a254e7 100644 --- a/.github/workflows/test_cxx_backend.yml +++ b/.github/workflows/test_cxx_backend.yml @@ -43,6 +43,7 @@ jobs: test-cxx-backend-ilp64: runs-on: ubuntu-latest name: C++ backend with ILP64 BLAS + timeout-minutes: 60 # Allow up to 60 minutes for ILP64 tests steps: - uses: actions/checkout@v5 @@ -57,15 +58,19 @@ jobs: rm -rf build mkdir -p build && cd build cmake .. -DSPARSEIR_BUILD_TESTING=ON -DSPARSEIR_USE_BLAS_ILP64=ON - cmake --build . - ctest --output-on-failure + cmake --build . -j$(nproc) + ctest --output-on-failure -j$(nproc) - name: Build and test capi_test against ILP64 backend working-directory: capi_test + env: + SPARSEIR_USE_BLAS_ILP64: 1 run: ./test_with_cxx_backend.sh - name: Build and run capi_sample with ILP64 working-directory: capi_sample + env: + SPARSEIR_USE_BLAS_ILP64: 1 run: ./run_sample.sh test-cxx-backend-rollup: diff --git a/capi_sample/run_sample.sh b/capi_sample/run_sample.sh index 9d6e65d7..0aba5327 100755 --- a/capi_sample/run_sample.sh +++ b/capi_sample/run_sample.sh @@ -27,8 +27,8 @@ cd "$BUILD_DIR/build_backend" cmake ../../../backend/cxx \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="../../work_cxx/install" \ - -DSPARSEIR_USE_BLAS=ON \ - -DSPARSEIR_BUILD_TESTING=OFF + -DSPARSEIR_BUILD_TESTING=OFF \ + ${SPARSEIR_USE_BLAS_ILP64:+-DSPARSEIR_USE_BLAS_ILP64=ON} cmake --build . -j$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) cmake --install . diff --git a/capi_test/CMakeLists.txt b/capi_test/CMakeLists.txt index 59af36d3..42c6c604 100644 --- a/capi_test/CMakeLists.txt +++ b/capi_test/CMakeLists.txt @@ -63,19 +63,20 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") endif() # Optional BLAS linking +# Check if ILP64 BLAS is requested (via environment variable or CMake option) +if(DEFINED ENV{SPARSEIR_USE_BLAS_ILP64} AND "$ENV{SPARSEIR_USE_BLAS_ILP64}") + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "capi_test: Using ILP64 BLAS (64-bit integers)") +else() + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "capi_test: Using LP64 BLAS (32-bit integers, default)") +endif() + find_package(BLAS QUIET) if(BLAS_FOUND) target_link_libraries(cinterface_integration PRIVATE ${BLAS_LIBRARIES}) endif() -# macOS Accelerate framework -if(APPLE) - find_library(ACCELERATE_FRAMEWORK Accelerate) - if(ACCELERATE_FRAMEWORK) - target_link_libraries(cinterface_integration PRIVATE ${ACCELERATE_FRAMEWORK}) - endif() -endif() - # Enable testing enable_testing() add_test(NAME cinterface_integration COMMAND cinterface_integration -d yes) diff --git a/capi_test/test_with_cxx_backend.sh b/capi_test/test_with_cxx_backend.sh index 5488ba39..ebe30c09 100755 --- a/capi_test/test_with_cxx_backend.sh +++ b/capi_test/test_with_cxx_backend.sh @@ -30,7 +30,7 @@ cmake "$BACKEND_DIR" \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ -DSPARSEIR_BUILD_TESTING=OFF \ - -DSPARSEIR_USE_BLAS=ON + ${SPARSEIR_USE_BLAS_ILP64:+-DSPARSEIR_USE_BLAS_ILP64=ON} echo -e "${YELLOW}Building backend/cxx...${NC}" cmake --build . -j$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) From 57a9cf74d855bb490e3fa201690c6cb1791f8c9f Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 21:04:51 +0900 Subject: [PATCH 10/11] chore: add verbose flag to ILP64 ctest for better debugging --- .github/workflows/test_cxx_backend.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_cxx_backend.yml b/.github/workflows/test_cxx_backend.yml index 04a254e7..de51852e 100644 --- a/.github/workflows/test_cxx_backend.yml +++ b/.github/workflows/test_cxx_backend.yml @@ -59,7 +59,7 @@ jobs: mkdir -p build && cd build cmake .. -DSPARSEIR_BUILD_TESTING=ON -DSPARSEIR_USE_BLAS_ILP64=ON cmake --build . -j$(nproc) - ctest --output-on-failure -j$(nproc) + ctest --output-on-failure --verbose -j$(nproc) - name: Build and test capi_test against ILP64 backend working-directory: capi_test From e519a43deea9d5840324c01f892967217e331a9d Mon Sep 17 00:00:00 2001 From: Hiroshi Shinaoka Date: Wed, 29 Oct 2025 21:07:58 +0900 Subject: [PATCH 11/11] fix: set default CMAKE_BUILD_TYPE to Release - Set default build type to Release in CMakeLists.txt if not specified - Remove obsolete SPARSEIR_USE_BLAS option from build_capi_with_tests.sh - This ensures Release mode is used by default instead of Debug --- backend/cxx/CMakeLists.txt | 6 ++++++ backend/cxx/build_capi_with_tests.sh | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/backend/cxx/CMakeLists.txt b/backend/cxx/CMakeLists.txt index 39630f8e..e0df5674 100644 --- a/backend/cxx/CMakeLists.txt +++ b/backend/cxx/CMakeLists.txt @@ -19,6 +19,12 @@ project(SparseIR set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Set default build type to Release if not specified +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + message(STATUS "CMAKE_BUILD_TYPE not set, defaulting to Release") +endif() + # Options option(SPARSEIR_USE_EXTERN_FBLAS_PTR "Use external BLAS function pointers (for Python bindings)" OFF) option(SPARSEIR_USE_BLAS_ILP64 "Enable ILP64 BLAS interface (64-bit integers)" OFF) diff --git a/backend/cxx/build_capi_with_tests.sh b/backend/cxx/build_capi_with_tests.sh index 81860d87..20b97ab8 100755 --- a/backend/cxx/build_capi_with_tests.sh +++ b/backend/cxx/build_capi_with_tests.sh @@ -22,8 +22,7 @@ echo -e "${YELLOW}Configuring CMake...${NC}" cd "$BUILD_DIR" cmake .. \ -DCMAKE_BUILD_TYPE=Release \ - -DSPARSEIR_BUILD_TESTING=ON \ - -DSPARSEIR_USE_BLAS=ON + -DSPARSEIR_BUILD_TESTING=ON # Build echo -e "${YELLOW}Building...${NC}"