diff --git a/.github/workflows/CI_PublishTestPyPI.yml b/.github/workflows/CI_PublishTestPyPI.yml index 059a7cd2..b3e6ba17 100644 --- a/.github/workflows/CI_PublishTestPyPI.yml +++ b/.github/workflows/CI_PublishTestPyPI.yml @@ -46,16 +46,13 @@ jobs: CIBW_BUILD_VERBOSITY: 1 CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28" CIBW_BUILD: "${{ matrix.py_tag }}-*" - CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }} SPARSEIR_USE_BLAS=1" + CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_SKIP: "*-manylinux_i686 *-musllinux_i686" # Install OpenBLAS using micromamba (conda-forge) - separate for manylinux and musllinux CIBW_BEFORE_ALL_MANYLINUX: "curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MUSLLINUX: "apk add --no-cache bzip2 && curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MACOS: "brew install openblas" - # Set environment variables to help CMake find OpenBLAS - CIBW_ENVIRONMENT_MANYLINUX: "SPARSEIR_USE_BLAS=1" - CIBW_ENVIRONMENT_MUSLLINUX: "SPARSEIR_USE_BLAS=1" with: package-dir: ./python output-dir: dist diff --git a/.github/workflows/PublishPyPI.yml b/.github/workflows/PublishPyPI.yml index 6f4b2bf6..9c386635 100644 --- a/.github/workflows/PublishPyPI.yml +++ b/.github/workflows/PublishPyPI.yml @@ -91,16 +91,13 @@ jobs: CIBW_BUILD_VERBOSITY: 1 CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28" CIBW_BUILD: "${{ matrix.py_tag }}-*" - CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }} SPARSEIR_USE_BLAS=1" + CIBW_ENVIRONMENT_MACOS: "MACOSX_DEPLOYMENT_TARGET=${{ matrix.deployment_target || '11.0' }}" CIBW_ARCHS: ${{ matrix.cibw_archs }} CIBW_SKIP: "*-manylinux_i686 *-musllinux_i686" # Install OpenBLAS using micromamba (conda-forge) - separate for manylinux and musllinux CIBW_BEFORE_ALL_MANYLINUX: "curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MUSLLINUX: "apk add --no-cache bzip2 && curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba && ./bin/micromamba create -y -p /opt/openblas && ./bin/micromamba install -y -c conda-forge openblas -p /opt/openblas --no-deps" CIBW_BEFORE_ALL_MACOS: "brew install openblas" - # Set environment variables to help CMake find OpenBLAS - CIBW_ENVIRONMENT_MANYLINUX: "SPARSEIR_USE_BLAS=1" - CIBW_ENVIRONMENT_MUSLLINUX: "SPARSEIR_USE_BLAS=1" with: package-dir: ./python output-dir: dist diff --git a/.github/workflows/test_cxx_backend.yml b/.github/workflows/test_cxx_backend.yml index 87d22afd..de51852e 100644 --- a/.github/workflows/test_cxx_backend.yml +++ b/.github/workflows/test_cxx_backend.yml @@ -2,7 +2,7 @@ name: Test C++ Backend on: push: - branches: [ "main", "459-restructure-directories" ] + branches: [ "main" ] pull_request: branches: [ "main" ] workflow_dispatch: @@ -40,9 +40,44 @@ jobs: working-directory: capi_sample run: ./run_sample.sh + test-cxx-backend-ilp64: + runs-on: ubuntu-latest + name: C++ backend with ILP64 BLAS + timeout-minutes: 60 # Allow up to 60 minutes for ILP64 tests + + steps: + - uses: actions/checkout@v5 + + - name: Install dependencies with ILP64 BLAS + run: | + sudo apt update && sudo apt install -y libeigen3-dev libopenblas64-0 libopenblas64-dev + + - name: Build and test C++ backend with ILP64 + working-directory: backend/cxx + run: | + rm -rf build + mkdir -p build && cd build + cmake .. -DSPARSEIR_BUILD_TESTING=ON -DSPARSEIR_USE_BLAS_ILP64=ON + cmake --build . -j$(nproc) + ctest --output-on-failure --verbose -j$(nproc) + + - name: Build and test capi_test against ILP64 backend + working-directory: capi_test + env: + SPARSEIR_USE_BLAS_ILP64: 1 + run: ./test_with_cxx_backend.sh + + - name: Build and run capi_sample with ILP64 + working-directory: capi_sample + env: + SPARSEIR_USE_BLAS_ILP64: 1 + run: ./run_sample.sh + test-cxx-backend-rollup: runs-on: ubuntu-latest - needs: test-cxx-backend + needs: + - test-cxx-backend + - test-cxx-backend-ilp64 if: always() steps: - name: All tests passed diff --git a/.github/workflows/test_fortran.yml b/.github/workflows/test_fortran.yml index e69fd05b..33ce5a18 100644 --- a/.github/workflows/test_fortran.yml +++ b/.github/workflows/test_fortran.yml @@ -2,7 +2,7 @@ name: Test Fortran Bindings on: push: - branches: [ "main", "459-restructure-directories" ] + branches: [ "main" ] pull_request: branches: [ "main" ] workflow_dispatch: diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 7e398fe5..41c24c1c 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - 459-restructure-directories pull_request: branches: - main @@ -50,21 +49,11 @@ jobs: with: enable-cache: true - - name: Setup build environment + - name: Setup and run tests working-directory: python - run: python3 setup_build.py - - - name: Run uv sync - working-directory: python - run: uv sync - env: - SPARSEIR_USE_BLAS: 1 - - - name: Run tests - working-directory: python - run: uv run pytest tests/ -v - env: - SPARSEIR_USE_BLAS: 1 + run: | + chmod +x run_tests.sh + ./run_tests.sh test-python-rollup: runs-on: ubuntu-latest diff --git a/CMakeLists.txt b/CMakeLists.txt index e89ec56d..bab5a8ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,65 +49,32 @@ endif() # Eigen3 # BLAS/LAPACK options -option(SPARSEIR_USE_BLAS "Enable BLAS support" OFF) -option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface (requires SPARSEIR_USE_BLAS=ON)" OFF) - -# BLAS/LAPACK search and configuration -if(SPARSEIR_USE_BLAS) - add_compile_definitions(SPARSEIR_USE_BLAS) - message(STATUS "BLAS support enabled") - - # ILP64 support - if(SPARSEIR_USE_ILP64) - add_compile_definitions(SPARSEIR_USE_ILP64) - message(STATUS "ILP64 BLAS interface enabled") - else() - message(STATUS "LP64 BLAS interface enabled (default)") - endif() +option(SPARSEIR_USE_BLAS_ILP64 "Enable ILP64 BLAS interface (64-bit integers)" OFF) + +# BLAS search and configuration (always enabled) +message(STATUS "BLAS support enabled") + +# Set BLAS integer size based on SPARSEIR_USE_BLAS_ILP64 +if(SPARSEIR_USE_BLAS_ILP64) + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "Searching for ILP64 BLAS interface (64-bit integers)") +else() + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "Searching for LP64 BLAS interface (32-bit integers, default)") +endif() + +# Find BLAS package using standard CMake FindBLAS +find_package(BLAS REQUIRED) - # Find BLAS package - if(SPARSEIR_USE_ILP64) - # For ILP64, check if BLAS_LIBRARIES is already set by user - if(BLAS_LIBRARIES) - message(STATUS "ILP64 BLAS library specified: ${BLAS_LIBRARIES}") - # Verify the library exists - if(NOT EXISTS "${BLAS_LIBRARIES}") - message(FATAL_ERROR "Specified ILP64 BLAS library not found: ${BLAS_LIBRARIES}") - endif() - set(BLAS_FOUND TRUE) - else() - # Try to find ILP64 BLAS automatically - find_library(BLAS_LIBRARIES - NAMES openblas64 - PATHS /usr/lib/x86_64-linux-gnu /usr/lib /usr/local/lib - NO_DEFAULT_PATH - ) - if(BLAS_LIBRARIES) - message(STATUS "ILP64 BLAS found automatically: ${BLAS_LIBRARIES}") - set(BLAS_FOUND TRUE) - else() - message(FATAL_ERROR "ILP64 BLAS (libopenblas64) not found. You can specify it with -DBLAS_LIBRARIES=/path/to/libopenblas64.so") - endif() - endif() +if(BLAS_FOUND) + message(STATUS "BLAS found: ${BLAS_LIBRARIES}") + if(SPARSEIR_USE_BLAS_ILP64) + message(STATUS "BLAS with ILP64 interface (BLA_SIZEOF_INTEGER=8)") else() - # For LP64, use standard CMake BLAS finder - find_package(BLAS REQUIRED) - if(BLAS_FOUND) - message(STATUS "BLAS found: ${BLAS_LIBRARIES}") - else() - message(FATAL_ERROR "BLAS not found") - endif() + message(STATUS "BLAS with LP64 interface (BLA_SIZEOF_INTEGER=4)") endif() - - # CMake's find_package(BLAS) should provide include directories - #if(BLAS_INCLUDE_DIRS) - #include_directories(${BLAS_INCLUDE_DIRS}) - #message(STATUS "BLAS include directories: ${BLAS_INCLUDE_DIRS}") - #else() - #message(FATAL_ERROR "BLAS include directories not found") - #endif() -elseif(SPARSEIR_USE_ILP64) - message(FATAL_ERROR "SPARSEIR_USE_ILP64 requires SPARSEIR_USE_BLAS=ON") +else() + message(FATAL_ERROR "BLAS not found") endif() @@ -187,23 +154,22 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) -# Add BLAS/LAPACK linking -if(SPARSEIR_USE_BLAS) - if(SPARSEIR_USE_ILP64 OR BLAS_FOUND) - target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) - message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") - else() - message(FATAL_ERROR "BLAS libraries not found") +# Add BLAS linking (always enabled) +# Link BLAS privately, but ensure it's not dropped by linker +# This allows dlopen/dlsym to find BLAS symbols at runtime +if(SPARSEIR_USE_BLAS_ILP64 OR BLAS_FOUND) + target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) + message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") + + # Ensure BLAS dependency is retained even if symbols are not directly used + # This is necessary for dynamic symbol resolution at runtime + if(NOT APPLE) + target_link_options(sparseir PRIVATE "LINKER:--no-as-needed") endif() else() - message(STATUS "BLAS linking disabled") + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() -if(SPARSEIR_USE_LAPACKE AND LAPACK_FOUND) - target_link_libraries(sparseir PRIVATE ${LAPACK_LIBRARIES}) -endif() - - # macOS-specific configuration if(APPLE AND ACCELERATE_FRAMEWORK) target_link_libraries(sparseir PRIVATE ${ACCELERATE_FRAMEWORK}) diff --git a/README.md b/README.md index 3db1c9e8..c459f7f4 100644 --- a/README.md +++ b/README.md @@ -99,71 +99,134 @@ cd build ### BLAS Support -By default, the library uses Eigen's internal implementations for matrix-matrix multiplication for fitting and sampling. -This does not require any additional system libraries. -However, the performance is not as good as the BLAS implementation. +BLAS support is **mandatory and always enabled** in this library. BLAS routines are used for performance-critical operations in fitting (`fit_tau`, `fit_matsubara`) and evaluation (`evaluate_tau`, `evaluate_matsubara`). -For performance critical applications, we recommend using BLAS. -For this, you need to set the `SPARSEIR_USE_BLAS` CMake option to `ON`. +#### Two Modes for BLAS Provision -```bash -cmake .. -DSPARSEIR_USE_BLAS=OFF -cmake .. -DSPARSEIR_USE_BLAS=ON -``` +The library supports two modes for providing BLAS functions: -Alternatively, you can set the `SPARSEIR_USE_BLAS` CMake option to `ON` in the `CMakeLists.txt` file of your project: +1. **Link-time BLAS (default)**: BLAS library is linked at build time +2. **Runtime BLAS registration**: BLAS function pointers are provided at runtime via C-API (used when `SPARSEIR_USE_EXTERN_FBLAS_PTR` is defined) -```cmake -cmake_minimum_required(VERSION 3.10) -project(MyProject) +The choice between these modes is determined at compile time based on the `SPARSEIR_USE_EXTERN_FBLAS_PTR` CMake option. -set(SPARSEIR_USE_BLAS ON) -``` +--- + +#### Mode 1: Link-time BLAS (Default) + +In this mode, a BLAS library (OpenBLAS, Intel MKL, Apple Accelerate, etc.) is linked at build time. The Fortran BLAS functions (`dgemm_`, `zgemm_`) are called directly. -Note: When enabling BLAS, ensure that the appropriate libraries (such as OpenBLAS, Intel MKL, or Apple Accelerate) can be found by CMake. +**ILP64 vs LP64 Interface:** -#### ILP64 BLAS Support +The library uses compile-time selection to choose between ILP64 (64-bit integers) and LP64 (32-bit integers) BLAS interfaces based on the `SPARSEIR_USE_BLAS_ILP64` CMake option: -For applications requiring large matrix operations (matrices larger than 2^31 elements), you can enable ILP64 BLAS support by setting both `SPARSEIR_USE_BLAS` and `SPARSEIR_USE_ILP64` to `ON`: +- **LP64 (default)**: Uses 32-bit integers (`int`) for matrix dimensions. Suitable for matrices up to 2^31-1 elements. +- **ILP64**: Uses 64-bit integers (`long long`) for matrix dimensions. Required for matrices larger than 2^31-1 elements. + +The interface selection is determined at compile time, so you must match the BLAS library interface with the compile-time setting. + +**Building with Link-time BLAS:** ```bash -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON +# Standard build with LP64 BLAS (default) +mkdir -p build && cd build +cmake .. +cmake --build . + +# On Ubuntu with OpenBLAS (LP64) +sudo apt install libopenblas-dev +cmake .. + +# On macOS (uses Accelerate framework automatically, LP64) +cmake .. ``` -This option: -- Uses 64-bit integers for matrix dimensions and leading dimensions -- Directly calls Fortran BLAS interfaces instead of CBLAS -- Requires ILP64-compatible BLAS libraries (e.g., `libopenblas64-0` on Ubuntu) +**For ILP64 BLAS:** + +If you need ILP64 support for large matrix operations (matrices larger than 2^31 elements), install ILP64-compatible BLAS libraries and enable the ILP64 option: -Example with ILP64 OpenBLAS on Ubuntu: ```bash +# Ubuntu with ILP64 OpenBLAS sudo apt install libopenblas64-0 libopenblas64-dev -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON +cmake .. -DSPARSEIR_USE_BLAS_ILP64=ON ``` -#### Manual BLAS Library Specification +**Important**: +- The `SPARSEIR_USE_BLAS_ILP64` CMake option affects both: + 1. Which BLAS library CMake searches for during configuration (sets `BLA_SIZEOF_INTEGER=8` for ILP64 or `BLA_SIZEOF_INTEGER=4` for LP64) + 2. The compiled interface types in the library code (64-bit vs 32-bit integers) +- You must ensure the linked BLAS library matches the compile-time interface selection (ILP64 or LP64) + +**Manual BLAS Library Specification:** -If CMake cannot automatically find the ILP64 BLAS library, you can specify it manually: +If CMake cannot automatically find the BLAS library, you can specify it manually: ```bash -# Specify the exact library path -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON \ +# For standard LP64 BLAS +cmake .. -DBLAS_LIBRARIES=/usr/lib/x86_64-linux-gnu/libopenblas.so + +# For ILP64 BLAS +cmake .. -DSPARSEIR_USE_BLAS_ILP64=ON \ -DBLAS_LIBRARIES=/usr/lib/x86_64-linux-gnu/libopenblas64.so.0 # Or use environment variables to help CMake find it export BLA_VENDOR=OpenBLAS -cmake .. -DSPARSEIR_USE_BLAS=ON -DSPARSEIR_USE_ILP64=ON +cmake .. ``` -#### Troubleshooting ILP64 BLAS +--- + +#### Mode 2: Runtime BLAS Registration + +In this mode (enabled with `-DSPARSEIR_USE_EXTERN_FBLAS_PTR=ON`), the library does not link BLAS at build time. Instead, BLAS function pointers must be registered at runtime before using the library. This mode is primarily used for language bindings (e.g., Python) where BLAS functions are provided by the host environment. -If you encounter linking errors like "undefined reference to `dgemm_`", ensure: +**Building with Runtime BLAS Registration:** + +```bash +mkdir -p build && cd build +cmake .. -DSPARSEIR_USE_EXTERN_FBLAS_PTR=ON +cmake --build . +``` -1. **ILP64 BLAS is installed**: Check with `find /usr/lib -name "*openblas64*"` -2. **Library contains ILP64 symbols**: Verify with `nm -D /path/to/libopenblas64.so | grep dgemm_` -3. **CMake finds the library**: Check the CMake output for "ILP64 BLAS found" messages +**Registering BLAS Functions:** -**Note**: ILP64 support requires `SPARSEIR_USE_BLAS=ON`. +You must call the appropriate registration function before using any BLAS functionality. The function to call depends on the compile-time setting of `SPARSEIR_USE_BLAS_ILP64`: + +- **If built with LP64** (`SPARSEIR_USE_BLAS_ILP64` not set): Call `spir_register_dgemm_zgemm_lp64` +- **If built with ILP64** (`SPARSEIR_USE_BLAS_ILP64=ON`): Call `spir_register_dgemm_zgemm_ilp64` + +**C API declarations** (only available when `SPARSEIR_USE_EXTERN_FBLAS_PTR` is defined): + +For LP64 interface (32-bit integers): +```c +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); +``` + +For ILP64 interface (64-bit integers): +```c +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); +``` + +**Important**: +- The registration function must match the compile-time interface selection (ILP64 or LP64) +- Only one registration function should be called (and only the one matching the build configuration) +- The library will throw a runtime error if BLAS functions are used without prior registration +- Function pointers must match the Fortran BLAS signature with the correct integer type (32-bit or 64-bit) + +**Example (Python with ctypes for LP64 build):** + +```python +import ctypes +import scipy.linalg.cython_blas as blas + +lib = ctypes.CDLL("libsparseir.so") +dgemm_ptr = ctypes.cast(blas.dgemm, ctypes.c_void_p).value +zgemm_ptr = ctypes.cast(blas.zgemm, ctypes.c_void_p).value +# For LP64 build: +lib.spir_register_dgemm_zgemm_lp64(dgemm_ptr, zgemm_ptr) +# For ILP64 build, use: +# lib.spir_register_dgemm_zgemm_ilp64(dgemm_ptr, zgemm_ptr) +``` ### Debug Logging at runtime @@ -188,6 +251,27 @@ This will create the `docs/html` directory. Open `docs/html/index.html` with you Please refer [`./sample_c/README.md`](./sample_c/README.md) to learn more. +## Python Bindings + +Python bindings are located in the `python/` directory. The bindings use `SPARSEIR_USE_EXTERN_FBLAS_PTR` to register BLAS function pointers from SciPy/Numpy at runtime. + +### Testing Python Bindings + +To test the Python bindings, use the provided `run_tests.sh` script: + +```bash +cd python +./run_tests.sh +``` + +This script: +1. Cleans up previous build artifacts (copied source files, `.venv`, build cache) +2. Sets up the build environment using `setup_build.py` +3. Installs dependencies and rebuilds the package using `uv sync --refresh` +4. Runs the test suite using `uv run pytest tests/ -v` + +The script ensures a clean build environment and automatically handles dependency management with `uv`. + ## For developers diff --git a/backend/cxx/CMakeLists.txt b/backend/cxx/CMakeLists.txt index 02d8eae8..e0df5674 100644 --- a/backend/cxx/CMakeLists.txt +++ b/backend/cxx/CMakeLists.txt @@ -19,10 +19,15 @@ project(SparseIR set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD_REQUIRED ON) +# Set default build type to Release if not specified +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) + message(STATUS "CMAKE_BUILD_TYPE not set, defaulting to Release") +endif() + # Options -option(SPARSEIR_USE_BLAS "Enable BLAS support" OFF) -option(SPARSEIR_USE_ILP64 "Enable ILP64 BLAS interface" OFF) -option(SPARSEIR_USE_LAPACKE "Enable LAPACKE support" OFF) +option(SPARSEIR_USE_EXTERN_FBLAS_PTR "Use external BLAS function pointers (for Python bindings)" OFF) +option(SPARSEIR_USE_BLAS_ILP64 "Enable ILP64 BLAS interface (64-bit integers)" OFF) option(SPARSEIR_BUILD_TESTING "Enable testing" OFF) option(SPARSEIR_DEBUG "Enable debug logging" OFF) @@ -57,17 +62,43 @@ if(NOT xprec_POPULATED) FetchContent_Populate(XPrec) endif() -# BLAS/LAPACK configuration -if(SPARSEIR_USE_BLAS) - add_compile_definitions(SPARSEIR_USE_BLAS) - if(SPARSEIR_USE_ILP64) - add_compile_definitions(SPARSEIR_USE_ILP64) - # Try to find ILP64 BLAS - if(NOT BLAS_LIBRARIES) - find_library(BLAS_LIBRARIES NAMES openblas64) +# BLAS configuration +if(SPARSEIR_USE_EXTERN_FBLAS_PTR) + # External function pointer mode: BLAS is provided at runtime, no linking needed + message(STATUS "BLAS will be provided via external function pointers at runtime") + add_compile_definitions(SPARSEIR_USE_EXTERN_FBLAS_PTR) + + # Set compile-time ILP64/LP64 selection + if(SPARSEIR_USE_BLAS_ILP64) + message(STATUS "Using ILP64 interface (64-bit integers) for external BLAS") + add_compile_definitions(SPARSEIR_USE_BLAS_ILP64) + else() + message(STATUS "Using LP64 interface (32-bit integers, default) for external BLAS") + endif() +else() + # Link-time BLAS mode: find and link BLAS library + # Set BLAS integer size based on SPARSEIR_USE_BLAS_ILP64 + if(SPARSEIR_USE_BLAS_ILP64) + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "Searching for ILP64 BLAS interface (64-bit integers)") + else() + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "Searching for LP64 BLAS interface (32-bit integers, default)") + endif() + + # Find BLAS package using standard CMake FindBLAS + find_package(BLAS REQUIRED) + + if(BLAS_FOUND) + message(STATUS "BLAS libraries found: ${BLAS_LIBRARIES}") + if(SPARSEIR_USE_BLAS_ILP64) + message(STATUS "BLAS with ILP64 interface (BLA_SIZEOF_INTEGER=8)") + add_compile_definitions(SPARSEIR_USE_BLAS_ILP64) + else() + message(STATUS "BLAS with LP64 interface (BLA_SIZEOF_INTEGER=4)") endif() else() - find_package(BLAS REQUIRED) + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() endif() @@ -111,28 +142,16 @@ set_target_properties(sparseir PROPERTIES target_link_libraries(sparseir PRIVATE Eigen3::Eigen Threads::Threads) -# BLAS/LAPACK linking -if(SPARSEIR_USE_BLAS) +# BLAS/LAPACK linking (only when not using external function pointers) +if(NOT SPARSEIR_USE_EXTERN_FBLAS_PTR) if(BLAS_LIBRARIES OR BLAS_FOUND) target_link_libraries(sparseir PRIVATE ${BLAS_LIBRARIES}) message(STATUS "Linked BLAS libraries: ${BLAS_LIBRARIES}") else() - message(FATAL_ERROR "BLAS libraries not found") + message(FATAL_ERROR "BLAS libraries not found - BLAS is required") endif() else() - message(STATUS "BLAS linking disabled") -endif() - -if(SPARSEIR_USE_LAPACKE AND LAPACK_FOUND) - target_link_libraries(sparseir PRIVATE ${LAPACK_LIBRARIES}) -endif() - -# macOS Accelerate framework -if(APPLE) - find_library(ACCELERATE_FRAMEWORK Accelerate) - if(ACCELERATE_FRAMEWORK) - target_link_libraries(sparseir PRIVATE ${ACCELERATE_FRAMEWORK}) - endif() + message(STATUS "BLAS linking skipped (using external function pointers)") endif() # Library convention alias diff --git a/backend/cxx/build_capi_with_tests.sh b/backend/cxx/build_capi_with_tests.sh index 81860d87..20b97ab8 100755 --- a/backend/cxx/build_capi_with_tests.sh +++ b/backend/cxx/build_capi_with_tests.sh @@ -22,8 +22,7 @@ echo -e "${YELLOW}Configuring CMake...${NC}" cd "$BUILD_DIR" cmake .. \ -DCMAKE_BUILD_TYPE=Release \ - -DSPARSEIR_BUILD_TESTING=ON \ - -DSPARSEIR_USE_BLAS=ON + -DSPARSEIR_BUILD_TESTING=ON # Build echo -e "${YELLOW}Building...${NC}" diff --git a/backend/cxx/include/sparseir/gemm.hpp b/backend/cxx/include/sparseir/gemm.hpp index 8c4a0f7b..7291658b 100644 --- a/backend/cxx/include/sparseir/gemm.hpp +++ b/backend/cxx/include/sparseir/gemm.hpp @@ -13,64 +13,38 @@ #include "sparseir/jacobi_svd.hpp" -#ifdef SPARSEIR_USE_BLAS -extern "C" { - -// Fortran BLAS interface declarations -#ifdef SPARSEIR_USE_ILP64 -// ILP64 Fortran BLAS interface (64-bit integers) -void dgemm_(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const double* alpha, const double* a, const long long* lda, - const double* b, const long long* ldb, const double* beta, - double* c, const long long* ldc); - -void zgemm_(const char* transa, const char* transb, - const long long* m, const long long* n, const long long* k, - const void* alpha, const void* a, const long long* lda, - const void* b, const long long* ldb, const void* beta, - void* c, const long long* ldc); -#else -// LP64 Fortran BLAS interface (32-bit integers) -void dgemm_(const char* transa, const char* transb, - const int* m, const int* n, const int* k, - const double* alpha, const double* a, const int* lda, - const double* b, const int* ldb, const double* beta, - double* c, const int* ldc); - -void zgemm_(const char* transa, const char* transb, - const int* m, const int* n, const int* k, - const void* alpha, const void* a, const int* lda, - const void* b, const int* ldb, const void* beta, - void* c, const int* ldc); -#endif - - -enum CBLAS_TRANSPOSE { - CblasNoTrans = 111, - CblasTrans = 112, - CblasConjTrans = 113 -}; - -enum CBLAS_ORDER { CblasRowMajor = 101, CblasColMajor = 102 }; -} - -#endif - namespace sparseir { -#ifdef SPARSEIR_USE_BLAS - -// Wrapper functions that call Fortran BLAS -void my_dgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const double alpha, - const double *A, const int64_t lda, const double *B, const int64_t ldb, - const double beta, double *C, const int64_t ldc); +// Fortran BLAS interface wrapper functions +// Integer type is selected at compile time based on SPARSEIR_USE_BLAS_ILP64 +#ifdef SPARSEIR_USE_BLAS_ILP64 +// ILP64 interface: 64-bit integers +void my_dgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc); + +void my_zgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc); +#else +// LP64 interface: 32-bit integers (default) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc); + +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc); +#endif -void my_zgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const void *alpha, - const void *A, const int64_t lda, const void *B, const int64_t ldb, - const void *beta, void *C, const int64_t ldc); // Type-specific CBLAS GEMM implementations template void _gemm_blas_impl(const U *A, const V *B, ResultType *C, int64_t M, int64_t N, @@ -92,8 +66,16 @@ inline void _gemm_blas_impl(const double *A, const double *B, double *C, int64_t M, int64_t N, int64_t K) { - my_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, 1.0, A, M, - B, K, 0.0, C, M); + const char transa = 'N', transb = 'N'; + const double alpha = 1.0, beta = 0.0; +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -104,12 +86,20 @@ inline void _gemm_blas_impl, std::complex, std::complex *C, int64_t M, int64_t N, int64_t K) { + const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), M, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * complex -> complex @@ -124,12 +114,20 @@ inline void _gemm_blas_impl, std::complex>( A_complex[i] = std::complex(A[i], 0.0); } + const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A_complex.data()), M, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A_complex.data()), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for complex * double -> complex @@ -144,12 +142,20 @@ inline void _gemm_blas_impl, double, std::complex>( B_complex[i] = std::complex(B[i], 0.0); } + const char transa = 'N', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), M, - reinterpret_cast(B_complex.data()), K, &beta, - reinterpret_cast(C), M); +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * double -> double (transpose B) @@ -159,8 +165,16 @@ inline void _gemm_blas_impl_transpose(const double *A, double *C, int64_t M, int64_t N, int64_t K) { - my_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, 1.0, A, M, B, - N, 0.0, C, M); + const char transa = 'N', transb = 'T'; + const double alpha = 1.0, beta = 0.0; +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#endif + my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -173,12 +187,20 @@ _gemm_blas_impl_transpose, std::complex, std::complex *C, int64_t M, int64_t N, int64_t K) { + const char transa = 'N', transb = 'T'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasNoTrans, CblasTrans, M, N, K, &alpha, - reinterpret_cast(A), M, - reinterpret_cast(B), N, &beta, - reinterpret_cast(C), M); +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(M), ldb = static_cast(N), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * complex -> complex (transpose B) @@ -228,8 +250,16 @@ inline void _gemm_blas_impl_conj(const double *A, const double *B, double *C, int64_t M, int64_t N, int64_t K) { - my_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, 1.0, A, K, B, - K, 0.0, C, M); + const char transa = 'T', transb = 'N'; + const double alpha = 1.0, beta = 0.0; +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); } // Specialization for complex * complex -> complex @@ -240,12 +270,20 @@ inline void _gemm_blas_impl_conj, std::complex, const std::complex *A, const std::complex *B, std::complex *C, int64_t M, int64_t N, int64_t K) { + const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), K, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for complex * double -> complex (conjugate A) @@ -261,12 +299,20 @@ _gemm_blas_impl_conj, double, std::complex>( B_complex[i] = std::complex(B[i], 0.0); } + const char transa = 'C', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A), K, - reinterpret_cast(B_complex.data()), K, &beta, - reinterpret_cast(C), M); +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A), &lda, + reinterpret_cast(B_complex.data()), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); } // Specialization for double * complex -> complex (conjugate A, @@ -283,15 +329,21 @@ _gemm_blas_impl_conj, std::complex>( A_complex[i] = std::complex(A[i], 0.0); } + const char transa = 'T', transb = 'N'; const std::complex alpha(1.0); const std::complex beta(0.0); - my_zgemm(CblasColMajor, CblasTrans, CblasNoTrans, M, N, K, &alpha, - reinterpret_cast(A_complex.data()), K, - reinterpret_cast(B), K, &beta, - reinterpret_cast(C), M); -} - +#ifdef SPARSEIR_USE_BLAS_ILP64 + const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); + const long long lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); +#else + const int m = static_cast(M), n = static_cast(N), k = static_cast(K); + const int lda = static_cast(K), ldb = static_cast(K), ldc = static_cast(M); #endif + my_zgemm(&transa, &transb, &m, &n, &k, reinterpret_cast(&alpha), + reinterpret_cast(A_complex.data()), &lda, + reinterpret_cast(B), &ldb, reinterpret_cast(&beta), + reinterpret_cast(C), &ldc); +} template void _gemm_inplace(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, int64_t K) @@ -303,12 +355,8 @@ void _gemm_inplace(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, int64_ B, K, N); Eigen::Map> C_map(C, M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl(A, B, C, M, N, K); -#else - C_map.noalias() = A_map * B_map; -#endif } // second matrix is transposed @@ -321,12 +369,8 @@ void _gemm_inplace_t(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, int6 B, N, K); Eigen::Map> C_map(C, M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl_transpose(A, B, C, M, N, K); -#else - C_map.noalias() = A_map * B_map.transpose(); -#endif } // first matrix is complex conjugated @@ -339,12 +383,8 @@ void _gemm_inplace_conj(const T1 *A, const T2 *B, T3 *C, int64_t M, int64_t N, i B, K, N); Eigen::Map> C_map(C, M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl_conj(A, B, C, M, N, K); -#else - C_map.noalias() = A_map.adjoint() * B_map; -#endif } template @@ -360,14 +400,9 @@ _gemm(const Eigen::Matrix &A, Eigen::Matrix result(M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl(A.data(), B.data(), result.data(), M, N, K); -#else - (void)K; // Suppress unused variable warning - result.noalias() = A * B; -#endif return result; } @@ -386,14 +421,9 @@ _gemm( Eigen::Matrix result(M, N); -#ifdef SPARSEIR_USE_BLAS // Call appropriate CBLAS function based on input types _gemm_blas_impl(A.data(), B.data(), result.data(), M, N, K); -#else - (void)K; // Suppress unused variable warning - result.noalias() = A * B; -#endif return result; } diff --git a/backend/cxx/include/sparseir/sparseir.h b/backend/cxx/include/sparseir/sparseir.h index de4f5aa8..77a61d27 100644 --- a/backend/cxx/include/sparseir/sparseir.h +++ b/backend/cxx/include/sparseir/sparseir.h @@ -1257,6 +1257,46 @@ int spir_sampling_fit_zd(const spir_sampling *s, int order, int ndim, const int *input_dims, int target_dim, const c_complex *input, double *out); +#ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR + +#ifdef SPARSEIR_USE_BLAS_ILP64 +/** + * @brief Register BLAS function pointers for ILP64 interface (64-bit integers) + * + * This function registers both dgemm and zgemm function pointers from an external + * BLAS library with ILP64 interface (64-bit integers). This is used when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is defined at compile time. + * + * @param dgemm_fn Function pointer to Fortran dgemm (ILP64 interface) + * @param zgemm_fn Function pointer to Fortran zgemm (ILP64 interface) + * + * @note The function pointers must be valid Fortran BLAS functions with ILP64 + * interface signature + * @note This function must be called before using any BLAS functionality when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled + */ +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn); +#else +/** + * @brief Register BLAS function pointers for LP64 interface (32-bit integers) + * + * This function registers both dgemm and zgemm function pointers from an external + * BLAS library with LP64 interface (32-bit integers). This is used when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is defined at compile time. + * + * @param dgemm_fn Function pointer to Fortran dgemm (LP64 interface) + * @param zgemm_fn Function pointer to Fortran zgemm (LP64 interface) + * + * @note The function pointers must be valid Fortran BLAS functions with LP64 + * interface signature + * @note This function must be called before using any BLAS functionality when + * SPARSEIR_USE_EXTERN_FBLAS_PTR is enabled + */ +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn); +#endif // SPARSEIR_USE_BLAS_ILP64 + +#endif // SPARSEIR_USE_EXTERN_FBLAS_PTR + #ifdef __cplusplus } #endif diff --git a/backend/cxx/src/gemm.cpp b/backend/cxx/src/gemm.cpp index b3c28509..cb82312f 100644 --- a/backend/cxx/src/gemm.cpp +++ b/backend/cxx/src/gemm.cpp @@ -1,111 +1,11 @@ #include "sparseir/gemm.hpp" -#ifdef SPARSEIR_USE_BLAS namespace sparseir { - -#include -#include - -// Fortran dgemm function pointer type -#ifdef SPARSEIR_USE_ILP64 -using dgemm_fptr = void(*)(const char*, const char*, const long long*, - const long long*, const long long*, const double*, - const double*, const long long*, const double*, - const long long*, const double*, double*, const long long*); - -using zgemm_fptr = void(*)(const char*, const char*, const long long*, - const long long*, const long long*, const void*, - const void*, const long long*, const void*, - const long long*, const void*, void*, const long long*); -#else -using dgemm_fptr = void(*)(const char*, const char*, const int*, - const int*, const int*, const double*, - const double*, const int*, const double*, - const int*, const double*, double*, const int*); - -using zgemm_fptr = void(*)(const char*, const char*, const int*, - const int*, const int*, const void*, - const void*, const int*, const void*, - const int*, const void*, void*, const int*); -#endif - - #ifdef SPARSEIR_USE_EXTERN_FBLAS_PTR -// Storage for registered function pointers (initially not registered) -static dgemm_fptr registered_dgemm = nullptr; -static zgemm_fptr registered_zgemm = nullptr; - -extern "C" { - -// Register function pointers from outside -void spir_register_dgemm(void* fn) { - registered_dgemm = reinterpret_cast(fn); -} - -void spir_register_zgemm(void* fn) { - registered_zgemm = reinterpret_cast(fn); -} - -} // extern "C" - -#else - -static dgemm_fptr registered_dgemm = dgemm_; -static zgemm_fptr registered_zgemm = zgemm_; - -#endif // SPARSEIR_USE_EXTERN_FBLAS_PTR - -// Implementation of my_dgemm - converts CBLAS interface to Fortran BLAS -void my_dgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const double alpha, - const double *A, const int64_t lda, const double *B, const int64_t ldb, - const double beta, double *C, const int64_t ldc) -{ - // Convert CBLAS transpose flags to Fortran character codes - char transa = (TransA == CblasNoTrans) ? 'N' : (TransA == CblasTrans) ? 'T' : 'C'; - char transb = (TransB == CblasNoTrans) ? 'N' : (TransB == CblasTrans) ? 'T' : 'C'; - -#ifdef SPARSEIR_USE_ILP64 - // ILP64: Convert to long long for Fortran interface - const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); - const long long lda_ll = static_cast(lda), ldb_ll = static_cast(ldb), ldc_ll = static_cast(ldc); - registered_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda_ll, B, &ldb_ll, &beta, C, &ldc_ll); + #include "gemm_external.impl" #else - // LP64: Convert to int for Fortran interface - const int m = static_cast(M), n = static_cast(N), k = static_cast(K); - const int lda_int = static_cast(lda), ldb_int = static_cast(ldb), ldc_int = static_cast(ldc); - registered_dgemm(&transa, &transb, &m, &n, &k, &alpha, A, &lda_int, B, &ldb_int, &beta, C, &ldc_int); + #include "gemm_link.impl" #endif -} - -// Implementation of my_zgemm - converts CBLAS interface to Fortran BLAS -void my_zgemm(const int Order, const int TransA, const int TransB, - const int64_t M, const int64_t N, const int64_t K, const void *alpha, - const void *A, const int64_t lda, const void *B, const int64_t ldb, - const void *beta, void *C, const int64_t ldc) -{ - // Convert CBLAS transpose flags to Fortran character codes - char transa = (TransA == CblasNoTrans) ? 'N' : (TransA == CblasTrans) ? 'T' : 'C'; - char transb = (TransB == CblasNoTrans) ? 'N' : (TransB == CblasTrans) ? 'T' : 'C'; - - if (!registered_zgemm) { - throw std::runtime_error("zgemm not registered"); - } -#ifdef SPARSEIR_USE_ILP64 - // ILP64: Convert to long long for Fortran interface - const long long m = static_cast(M), n = static_cast(N), k = static_cast(K); - const long long lda_ll = static_cast(lda), ldb_ll = static_cast(ldb), ldc_ll = static_cast(ldc); - registered_zgemm(&transa, &transb, &m, &n, &k, alpha, A, &lda_ll, B, &ldb_ll, beta, C, &ldc_ll); -#else - // LP64: Convert to int for Fortran interface - const int m = static_cast(M), n = static_cast(N), k = static_cast(K); - const int lda_int = static_cast(lda), ldb_int = static_cast(ldb), ldc_int = static_cast(ldc); - registered_zgemm(&transa, &transb, &m, &n, &k, alpha, A, &lda_int, B, &ldb_int, beta, C, &ldc_int); -#endif -} } // namespace sparseir - - -#endif // SPARSEIR_USE_BLAS diff --git a/backend/cxx/src/gemm_external.impl b/backend/cxx/src/gemm_external.impl new file mode 100644 index 00000000..c95e5572 --- /dev/null +++ b/backend/cxx/src/gemm_external.impl @@ -0,0 +1,109 @@ +// External function pointer registration implementation +// This file is included when SPARSEIR_USE_EXTERN_FBLAS_PTR is defined +// Note: This file is included inside namespace sparseir {} in gemm.cpp +// +// IMPORTANT: In this mode, BLAS libraries are NOT directly linked. +// Therefore, dgemm_ and zgemm_ symbols do NOT exist. +// All BLAS calls go through externally registered function pointers. + +#include +#include + +// Function pointer types for external registration +// (No extern "C" dgemm_/zgemm_ declarations here - they don't exist when using external pointers) +#ifdef SPARSEIR_USE_BLAS_ILP64 +using dgemm_fptr = void(*)(const char*, const char*, const long long*, + const long long*, const long long*, const double*, + const double*, const long long*, const double*, + const long long*, const double*, double*, const long long*); + +using zgemm_fptr = void(*)(const char*, const char*, const long long*, + const long long*, const long long*, const void*, + const void*, const long long*, const void*, + const long long*, const void*, void*, const long long*); +#else +using dgemm_fptr = void(*)(const char*, const char*, const int*, + const int*, const int*, const double*, + const double*, const int*, const double*, + const int*, const double*, double*, const int*); + +using zgemm_fptr = void(*)(const char*, const char*, const int*, + const int*, const int*, const void*, + const void*, const int*, const void*, + const int*, const void*, void*, const int*); +#endif + +// Storage for externally registered function pointers +static dgemm_fptr registered_dgemm = nullptr; +static zgemm_fptr registered_zgemm = nullptr; + +extern "C" { + +// Register function pointers from outside (for Python bindings) +#ifdef SPARSEIR_USE_BLAS_ILP64 +void spir_register_dgemm_zgemm_ilp64(void* dgemm_fn, void* zgemm_fn) { + registered_dgemm = reinterpret_cast(dgemm_fn); + registered_zgemm = reinterpret_cast(zgemm_fn); +} +#else +void spir_register_dgemm_zgemm_lp64(void* dgemm_fn, void* zgemm_fn) { + registered_dgemm = reinterpret_cast(dgemm_fn); + registered_zgemm = reinterpret_cast(zgemm_fn); +} +#endif + +} // extern "C" + +#ifdef SPARSEIR_USE_BLAS_ILP64 +// ILP64 interface +void my_dgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc) +{ + if (!registered_dgemm) { + throw std::runtime_error("dgemm function pointer not registered - call spir_register_dgemm_zgemm_ilp64() first"); + } + registered_dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc) +{ + if (!registered_zgemm) { + throw std::runtime_error("zgemm function pointer not registered - call spir_register_dgemm_zgemm_ilp64() first"); + } + registered_zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +#else +// LP64 interface (default) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc) +{ + if (!registered_dgemm) { + throw std::runtime_error("dgemm function pointer not registered - call spir_register_dgemm_zgemm_lp64() first"); + } + registered_dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc) +{ + if (!registered_zgemm) { + throw std::runtime_error("zgemm function pointer not registered - call spir_register_dgemm_zgemm_lp64() first"); + } + registered_zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} +#endif + diff --git a/backend/cxx/src/gemm_link.impl b/backend/cxx/src/gemm_link.impl new file mode 100644 index 00000000..489ccb69 --- /dev/null +++ b/backend/cxx/src/gemm_link.impl @@ -0,0 +1,75 @@ +// Direct BLAS linking implementation +// This file is included when SPARSEIR_USE_EXTERN_FBLAS_PTR is NOT defined +// Note: This file is included inside namespace sparseir {} in gemm.cpp + +#include +#include + +// Declare Fortran BLAS functions based on compile-time ILP64/LP64 selection +extern "C" { +#ifdef SPARSEIR_USE_BLAS_ILP64 + // ILP64 interface: 64-bit integers + void dgemm_(const char* transa, const char* transb, const long long* m, + const long long* n, const long long* k, const double* alpha, + const double* a, const long long* lda, const double* b, + const long long* ldb, const double* beta, double* c, const long long* ldc); + + void zgemm_(const char* transa, const char* transb, const long long* m, + const long long* n, const long long* k, const void* alpha, + const void* a, const long long* lda, const void* b, + const long long* ldb, const void* beta, void* c, const long long* ldc); +#else + // LP64 interface: 32-bit integers (default) + void dgemm_(const char* transa, const char* transb, const int* m, + const int* n, const int* k, const double* alpha, + const double* a, const int* lda, const double* b, + const int* ldb, const double* beta, double* c, const int* ldc); + + void zgemm_(const char* transa, const char* transb, const int* m, + const int* n, const int* k, const void* alpha, + const void* a, const int* lda, const void* b, + const int* ldb, const void* beta, void* c, const int* ldc); +#endif +} + +#ifdef SPARSEIR_USE_BLAS_ILP64 +// ILP64 interface +void my_dgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const double* alpha, const double* a, const long long* lda, + const double* b, const long long* ldb, const double* beta, + double* c, const long long* ldc) +{ + dgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const long long* m, const long long* n, const long long* k, + const void* alpha, const void* a, const long long* lda, + const void* b, const long long* ldb, const void* beta, + void* c, const long long* ldc) +{ + zgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +#else +// LP64 interface (default) +void my_dgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, + double* c, const int* ldc) +{ + dgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} + +void my_zgemm(const char* transa, const char* transb, + const int* m, const int* n, const int* k, + const void* alpha, const void* a, const int* lda, + const void* b, const int* ldb, const void* beta, + void* c, const int* ldc) +{ + zgemm_(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); +} +#endif + diff --git a/backend/cxx/test/CMakeLists.txt b/backend/cxx/test/CMakeLists.txt index 35263eb1..37a56dbb 100644 --- a/backend/cxx/test/CMakeLists.txt +++ b/backend/cxx/test/CMakeLists.txt @@ -23,15 +23,11 @@ foreach(TEST_FILE ${TEST_FILES}) target_link_libraries(${TEST_NAME} PRIVATE SparseIR::sparseir) target_link_libraries(${TEST_NAME} PRIVATE Eigen3::Eigen Threads::Threads) - # Add BLAS/LAPACK linking for tests if enabled - if(SPARSEIR_USE_BLAS AND BLAS_FOUND) + # Add BLAS linking for tests (always enabled) + if(BLAS_FOUND) target_link_libraries(${TEST_NAME} PRIVATE ${BLAS_LIBRARIES}) endif() - if(SPARSEIR_USE_LAPACKE AND LAPACK_FOUND) - target_link_libraries(${TEST_NAME} PRIVATE ${LAPACK_LIBRARIES}) - endif() - # macOS-specific configuration for tests if(APPLE AND ACCELERATE_FRAMEWORK) target_link_libraries(${TEST_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) diff --git a/capi_sample/run_sample.sh b/capi_sample/run_sample.sh index 9d6e65d7..0aba5327 100755 --- a/capi_sample/run_sample.sh +++ b/capi_sample/run_sample.sh @@ -27,8 +27,8 @@ cd "$BUILD_DIR/build_backend" cmake ../../../backend/cxx \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="../../work_cxx/install" \ - -DSPARSEIR_USE_BLAS=ON \ - -DSPARSEIR_BUILD_TESTING=OFF + -DSPARSEIR_BUILD_TESTING=OFF \ + ${SPARSEIR_USE_BLAS_ILP64:+-DSPARSEIR_USE_BLAS_ILP64=ON} cmake --build . -j$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) cmake --install . diff --git a/capi_test/CMakeLists.txt b/capi_test/CMakeLists.txt index 59af36d3..42c6c604 100644 --- a/capi_test/CMakeLists.txt +++ b/capi_test/CMakeLists.txt @@ -63,19 +63,20 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") endif() # Optional BLAS linking +# Check if ILP64 BLAS is requested (via environment variable or CMake option) +if(DEFINED ENV{SPARSEIR_USE_BLAS_ILP64} AND "$ENV{SPARSEIR_USE_BLAS_ILP64}") + set(BLA_SIZEOF_INTEGER 8) + message(STATUS "capi_test: Using ILP64 BLAS (64-bit integers)") +else() + set(BLA_SIZEOF_INTEGER 4) + message(STATUS "capi_test: Using LP64 BLAS (32-bit integers, default)") +endif() + find_package(BLAS QUIET) if(BLAS_FOUND) target_link_libraries(cinterface_integration PRIVATE ${BLAS_LIBRARIES}) endif() -# macOS Accelerate framework -if(APPLE) - find_library(ACCELERATE_FRAMEWORK Accelerate) - if(ACCELERATE_FRAMEWORK) - target_link_libraries(cinterface_integration PRIVATE ${ACCELERATE_FRAMEWORK}) - endif() -endif() - # Enable testing enable_testing() add_test(NAME cinterface_integration COMMAND cinterface_integration -d yes) diff --git a/capi_test/test_with_cxx_backend.sh b/capi_test/test_with_cxx_backend.sh index 5488ba39..ebe30c09 100755 --- a/capi_test/test_with_cxx_backend.sh +++ b/capi_test/test_with_cxx_backend.sh @@ -30,7 +30,7 @@ cmake "$BACKEND_DIR" \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ -DSPARSEIR_BUILD_TESTING=OFF \ - -DSPARSEIR_USE_BLAS=ON + ${SPARSEIR_USE_BLAS_ILP64:+-DSPARSEIR_USE_BLAS_ILP64=ON} echo -e "${YELLOW}Building backend/cxx...${NC}" cmake --build . -j$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) diff --git a/fortran/test_with_cxx_backend.sh b/fortran/test_with_cxx_backend.sh index 2a6bff64..20773bdf 100755 --- a/fortran/test_with_cxx_backend.sh +++ b/fortran/test_with_cxx_backend.sh @@ -30,8 +30,7 @@ cd "$WORK_DIR/build_backend" cmake "$BACKEND_DIR" \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" \ - -DSPARSEIR_BUILD_TESTING=OFF \ - -DSPARSEIR_USE_BLAS=ON + -DSPARSEIR_BUILD_TESTING=OFF echo -e "${YELLOW}Building backend/cxx...${NC}" cmake --build . -j$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 1f74db2a..bb294e3c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -39,15 +39,8 @@ if(SPARSEIR_BUILD_FORTRAN) message(STATUS "Fortran support enabled for building Fortran bindings") endif() -# Eigen3 -# BLAS/LAPACK options -if(SPARSEIR_USE_BLAS) - add_compile_definitions(SPARSEIR_USE_BLAS) - message(STATUS "BLAS support enabled") - - # Do not resolve BLAS library symbols (assume sharing with NumPy) - # Do not call find_package(BLAS) or, if called, do not pass to target_link_libraries -endif() +# Do not resolve BLAS library symbols (assume sharing with NumPy) +# Do not call find_package(BLAS) or, if called, do not pass to target_link_libraries # External FBLAS pointer option if(SPARSEIR_USE_EXTERN_FBLAS_PTR) diff --git a/python/pylibsparseir/core.py b/python/pylibsparseir/core.py index e8e9df62..7b70457e 100644 --- a/python/pylibsparseir/core.py +++ b/python/pylibsparseir/core.py @@ -71,10 +71,11 @@ def _find_library(): ptr_z = ctypes.pythonapi.PyCapsule_GetPointer(capsule_z, name_z) _lib = ctypes.CDLL(_find_library()) - _lib.spir_register_dgemm.argtypes = [ctypes.c_void_p] - _lib.spir_register_dgemm(ptr) - _lib.spir_register_zgemm.argtypes = [ctypes.c_void_p] - _lib.spir_register_zgemm(ptr_z) + # Register both dgemm and zgemm at once using LP64 interface + # Note: SciPy BLAS typically uses LP64 interface (32-bit integers) + _lib.spir_register_dgemm_zgemm_lp64.argtypes = [ctypes.c_void_p, ctypes.c_void_p] + _lib.spir_register_dgemm_zgemm_lp64.restype = None + _lib.spir_register_dgemm_zgemm_lp64(ptr, ptr_z) if os.environ.get("SPARSEIR_DEBUG", "").lower() in ("1", "true", "yes", "on"): print(f"[core.py] Registered SciPy BLAS dgemm @ {hex(ptr)}") print(f"[core.py] Registered SciPy BLAS zgemm @ {hex(ptr_z)}") diff --git a/python/pyproject.toml b/python/pyproject.toml index b60467b4..547334df 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -42,7 +42,6 @@ cmake.build-type = "Release" # CMake arguments cmake.args = [ - "-DSPARSEIR_USE_BLAS=ON", "-DSPARSEIR_USE_EXTERN_FBLAS_PTR=ON", "-DBUILD_SHARED_LIBS=ON", "-DCMAKE_CXX_STANDARD=11", @@ -67,7 +66,6 @@ sdist.include = [ [tool.scikit-build.cmake.define] -SPARSEIR_USE_BLAS = "ON" BUILD_SHARED_LIBS = "ON" CMAKE_CXX_STANDARD = "11" CMAKE_CXX_STANDARD_REQUIRED = "ON" diff --git a/python/run_tests.sh b/python/run_tests.sh new file mode 100755 index 00000000..e3e6e1f4 --- /dev/null +++ b/python/run_tests.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Script to setup build environment and run tests with uv + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "======================================" +echo "Cleaning up previous build artifacts..." +echo "======================================" +# Remove copied source files and directories +[ -d "include" ] && rm -rf "include" +[ -d "src" ] && rm -rf "src" +[ -d "cmake" ] && rm -rf "cmake" +[ -f "LICENSE" ] && rm -f "LICENSE" + +# Remove .venv directory if it exists +[ -d ".venv" ] && rm -rf ".venv" + +# Remove build cache directories +[ -d "_skbuild" ] && rm -rf "_skbuild" +[ -d "dist" ] && rm -rf "dist" +[ -d "*.egg-info" ] && rm -rf *.egg-info 2>/dev/null || true + +echo "Cleanup completed." + +echo "" +echo "======================================" +echo "Setting up build environment..." +echo "======================================" +python3 setup_build.py + +echo "" +echo "======================================" +echo "Running uv sync (with rebuild)..." +echo "======================================" +# Remove any cached build artifacts and force rebuild +uv sync --refresh + +echo "" +echo "======================================" +echo "Running tests..." +echo "======================================" +uv run pytest tests/ -v + +echo "" +echo "======================================" +echo "All tests completed successfully!" +echo "======================================" +