From 337e5aab281c1b4e5fb41a1cfb270115bcf6f891 Mon Sep 17 00:00:00 2001 From: SaltyChiang Date: Tue, 5 Nov 2024 00:54:10 +0800 Subject: [PATCH 1/3] Enable Windows build. --- CMakeLists.txt | 12 +- include/color_spinor_field_order.h | 4 +- include/communicator_quda.h | 2 +- include/complex_quda.h | 5 + include/gauge_field_order.h | 16 +- include/instantiate.h | 99 ++++----- include/instantiate_dslash.h | 6 +- include/kernel_helper.h | 3 + include/kernels/dslash_mdw_fused.cuh | 2 +- include/kernels/evec_project.cuh | 4 +- include/kernels/gauge_random.cuh | 6 +- include/multi_blas_helper.cuh | 4 +- include/polynomial.h | 5 + include/quda.h | 197 ++++++++++-------- include/quda_matrix.h | 4 +- include/reliable_updates.h | 14 +- include/targets/cuda/block_reduction_kernel.h | 16 +- include/targets/cuda/fast_intdiv.h | 4 + include/targets/cuda/kernel.h | 30 +-- include/targets/cuda/math_helper.cuh | 8 +- include/targets/cuda/reduction_kernel.h | 20 +- include/timer.h | 28 +++ include/util_quda.h | 4 + lib/CMakeLists.txt | 4 + lib/comm_common.cpp | 6 + lib/copy_color_spinor.cuh | 8 +- lib/copy_gauge.in.cpp | 2 +- lib/copy_gauge_inc.cu | 2 +- lib/covariant_derivative.cu | 2 +- lib/dirac_coarse.cpp | 2 +- lib/dslash5_domain_wall.cu | 2 +- lib/dslash5_mobius_eofa.cu | 2 +- lib/dslash_clover_helper.cu | 4 +- lib/dslash_domain_wall_4d.cu | 2 +- lib/dslash_domain_wall_4d_m5inv.cu | 2 +- lib/dslash_domain_wall_4d_m5inv_m5inv.cu | 2 +- lib/dslash_domain_wall_4d_m5inv_m5pre.cu | 2 +- lib/dslash_domain_wall_4d_m5mob.cu | 2 +- lib/dslash_domain_wall_4d_m5pre.cu | 2 +- lib/dslash_domain_wall_4d_m5pre_m5inv.cu | 2 +- lib/dslash_domain_wall_4d_m5pre_m5mob.cu | 2 +- lib/dslash_domain_wall_5d.cu | 2 +- lib/dslash_gamma_helper.cu | 4 +- lib/dslash_improved_staggered.cu | 2 +- lib/dslash_ndeg_twisted_clover.cu | 2 +- ...lash_ndeg_twisted_clover_preconditioned.cu | 2 +- lib/dslash_ndeg_twisted_mass.cu | 2 +- ...dslash_ndeg_twisted_mass_preconditioned.cu | 2 +- lib/dslash_policy.hpp | 5 +- lib/dslash_staggered.cu | 6 +- lib/dslash_twisted_clover.cu | 2 +- lib/dslash_twisted_clover_preconditioned.cu | 2 +- lib/dslash_twisted_mass.cu | 2 +- lib/dslash_twisted_mass_preconditioned.cu | 2 +- lib/dslash_wilson.cu | 2 +- lib/dslash_wilson_clover.cu | 2 +- lib/dslash_wilson_clover_distance.cu | 2 +- lib/dslash_wilson_clover_hasenbusch_twist.cu | 2 +- ..._clover_hasenbusch_twist_preconditioned.cu | 4 +- lib/dslash_wilson_clover_preconditioned.cu | 2 +- ...h_wilson_clover_preconditioned_distance.cu | 2 +- lib/dslash_wilson_distance.cu | 2 +- lib/eig_block_trlm.cpp | 4 +- lib/extract_gauge_ghost.in.cu | 22 +- lib/extract_gauge_ghost_extended.cu | 20 +- lib/extract_gauge_ghost_mg.in.cu | 2 +- lib/hisq_paths_force_quda.cu | 6 +- lib/interface_quda.cpp | 20 +- lib/inv_ca_cg.cpp | 2 +- lib/inv_ca_gcr.cpp | 2 +- lib/inv_cg3_quda.cpp | 8 +- lib/inv_cg_quda.cpp | 2 +- lib/inv_eigcg_quda.cpp | 2 +- lib/inv_gcr_quda.cpp | 8 +- lib/inv_multi_cg_quda.cpp | 6 +- lib/laplace.cu | 2 +- lib/llfat_quda.cu | 4 +- lib/milc_interface.cpp | 5 +- lib/solve.cpp | 2 +- lib/spin_taste.cu | 2 +- lib/staggered_kd_apply_xinv.cu | 2 +- lib/staggered_oprod.cu | 2 +- lib/staggered_quark_smearing.cu | 2 +- lib/staggered_two_link_quda.cu | 2 +- lib/targets/cuda/device.cpp | 2 +- lib/targets/cuda/malloc.cpp | 41 ++++ lib/targets/cuda/target_cuda.cmake | 5 +- lib/transform_reduce.cu | 36 ++-- lib/tune.cpp | 38 +++- lib/unitarize_force_quda.cu | 4 +- lib/util_quda.cpp | 2 +- 91 files changed, 510 insertions(+), 341 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 803f5dba41..0146130cee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,10 @@ include(cmake/CPM.cmake) find_package(Git) +if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() + # By default we will build DEVEL. The different build types will pass different # flags to the compiler which may be strict or permissive on warnings, or # very verbose at run time and/or compile time. @@ -337,7 +341,7 @@ set(CMAKE_CXX_FLAGS_STRICT "-Os" CACHE STRING "Flags used by the C++ compiler during strict jenkins builds.") set(CMAKE_CXX_FLAGS_RELEASE - "-O3 ${CXX_OPT}" + "-Zi -O2 ${CXX_OPT}" CACHE STRING "Flags used by the C++ compiler during release builds.") set(CMAKE_CXX_FLAGS_HOSTDEBUG "-g" @@ -359,7 +363,7 @@ set(CMAKE_C_FLAGS_STRICT "-Os" CACHE STRING "Flags used by the C compiler during strict jenkins builds.") set(CMAKE_C_FLAGS_RELEASE - "-O3" + "-Zi -O2" CACHE STRING "Flags used by the C compiler during release builds.") set(CMAKE_C_FLAGS_HOSTDEBUG "-g" @@ -709,5 +713,7 @@ include(CTest) # add tests, utils, reference, and quda library add_subdirectory(lib) -add_subdirectory(tests) +if (NOT WIN32) + add_subdirectory(tests) +endif() add_subdirectory(doc) diff --git a/include/color_spinor_field_order.h b/include/color_spinor_field_order.h index 25f5234390..0664a40c47 100644 --- a/include/color_spinor_field_order.h +++ b/include/color_spinor_field_order.h @@ -1196,7 +1196,7 @@ namespace quda using Accessor = FloatNOrder; using GhostNOrder = GhostNOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; using Vector = typename VectorType::type; using AllocInt = typename AllocType::type; using norm_type = float; @@ -1432,7 +1432,7 @@ namespace quda using Accessor = FloatNOrder; using GhostNOrder = GhostNOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; using Vector = int4; // 128-bit packed type using AllocInt = typename AllocType::type; using norm_type = float; diff --git a/include/communicator_quda.h b/include/communicator_quda.h index aec02b8c2a..c4b0718904 100644 --- a/include/communicator_quda.h +++ b/include/communicator_quda.h @@ -1,6 +1,6 @@ #pragma once -#include // for gethostname() +// #include // for gethostname() #include #include #include diff --git a/include/complex_quda.h b/include/complex_quda.h index 18da63def5..9dbc6c95a1 100644 --- a/include/complex_quda.h +++ b/include/complex_quda.h @@ -20,6 +20,11 @@ #pragma once +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#include +#undef _USE_MATH_DEFINES +#endif #include #include #include diff --git a/include/gauge_field_order.h b/include/gauge_field_order.h index 38079c3cbb..2b8c067c99 100644 --- a/include/gauge_field_order.h +++ b/include/gauge_field_order.h @@ -1852,7 +1852,7 @@ namespace quda { template struct QDPOrder : public LegacyOrder { using Accessor = QDPOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge[QUDA_MAX_DIM]; const unsigned int volumeCB; QDPOrder(const GaugeField &u, Float *gauge_ = 0, Float **ghost_ = 0) : @@ -1898,7 +1898,7 @@ namespace quda { template struct QDPJITOrder : public LegacyOrder { using Accessor = QDPJITOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge[QUDA_MAX_DIM]; const unsigned int volumeCB; QDPJITOrder(const GaugeField &u, Float *gauge_ = 0, Float **ghost_ = 0) : @@ -1948,7 +1948,7 @@ namespace quda { template struct MILCOrder : public LegacyOrder { using Accessor = MILCOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; const int geometry; @@ -2009,7 +2009,7 @@ namespace quda { template struct MILCSiteOrder : public LegacyOrder { using Accessor = MILCSiteOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; const int geometry; @@ -2068,7 +2068,7 @@ namespace quda { template struct CPSOrder : LegacyOrder { using Accessor = CPSOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; const real anisotropy; @@ -2137,7 +2137,7 @@ namespace quda { template struct BQCDOrder : LegacyOrder { using Accessor = BQCDOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; unsigned int exVolumeCB; // extended checkerboard volume @@ -2199,7 +2199,7 @@ namespace quda { template struct TIFROrder : LegacyOrder { using Accessor = TIFROrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; static constexpr int Nc = 3; @@ -2263,7 +2263,7 @@ namespace quda { template struct TIFRPaddedOrder : LegacyOrder { using Accessor = TIFRPaddedOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; int exVolumeCB; diff --git a/include/instantiate.h b/include/instantiate.h index 8eee6ad269..6597fb3156 100644 --- a/include/instantiate.h +++ b/include/instantiate.h @@ -33,41 +33,32 @@ namespace quda @brief Helper function for returning if a given gauge field order is enabled @tparam order The order requested */ - template constexpr bool is_enabled(); + constexpr bool is_enabled(QudaGaugeFieldOrder order) + { + switch (order) { #ifdef BUILD_QDP_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_QDP_GAUGE_ORDER: return true; #endif #ifdef BUILD_QDPJIT_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_QDPJIT_GAUGE_ORDER: return true; #endif #ifdef BUILD_CPS_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_CPS_WILSON_GAUGE_ORDER: return true; #endif #ifdef BUILD_MILC_INTERFACE - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } - template <> constexpr bool is_enabled() { return false; } + case QUDA_MILC_GAUGE_ORDER: return true; + case QUDA_MILC_SITE_GAUGE_ORDER: return true; #endif #ifdef BUILD_BQCD_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_BQCD_GAUGE_ORDER: return true; #endif #ifdef BUILD_TIFR_INTERFACE - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } - template <> constexpr bool is_enabled() { return false; } + case QUDA_TIFR_GAUGE_ORDER: return true; + case QUDA_TIFR_PADDED_GAUGE_ORDER: return true; #endif + default: return false; + } + } /** @brief Helper function for returning if a given precision is enabled @@ -89,13 +80,18 @@ namespace quda @tparam reconstruct The reconstruct requested @return True if enabled, false if not */ - template constexpr bool is_enabled(); - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 4) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 2) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 2) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 1) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 1) ? true : false; } - template <> constexpr bool is_enabled() { return true; } + constexpr bool is_enabled(QudaReconstructType reconstruct) + { + switch (reconstruct) { + case QUDA_RECONSTRUCT_NO: return (QUDA_RECONSTRUCT & 4) ? true : false; + case QUDA_RECONSTRUCT_13: return (QUDA_RECONSTRUCT & 2) ? true : false; + case QUDA_RECONSTRUCT_12: return (QUDA_RECONSTRUCT & 2) ? true : false; + case QUDA_RECONSTRUCT_9: return (QUDA_RECONSTRUCT & 1) ? true : false; + case QUDA_RECONSTRUCT_8: return (QUDA_RECONSTRUCT & 1) ? true : false; + case QUDA_RECONSTRUCT_10: return true; + default: return false; + } + } struct ReconstructFull { static constexpr std::array recon @@ -142,8 +138,8 @@ namespace quda void instantiateReconstruct(G &U, Args &&...args) { if (U.Reconstruct() == Recon::recon[i]) { - if constexpr (is_enabled()) - Apply(U, args...); + if constexpr (is_enabled(Recon::recon[i])) + Apply apply(U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable %d", QUDA_RECONSTRUCT, Recon::recon[i]); } else if constexpr (i > 0) { @@ -471,13 +467,13 @@ namespace quda constexpr void instantiateGaugeStaggered(G &U, Args &&...args) { if (U.Reconstruct() == QUDA_RECONSTRUCT_NO) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_NO)) // actual phase type doesn't matter because the phase is baked into the links Apply(U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable %d", QUDA_RECONSTRUCT, QUDA_RECONSTRUCT_NO); } else if (U.Reconstruct() == QUDA_RECONSTRUCT_13) { - if constexpr (is_enabled()) { + if constexpr (is_enabled(QUDA_RECONSTRUCT_13)) { if (U.StaggeredPhase() == QUDA_STAGGERED_PHASE_NO) Apply(U, args...); else if (U.StaggeredPhase() == QUDA_STAGGERED_PHASE_MILC) @@ -488,7 +484,7 @@ namespace quda errorQuda("QUDA_RECONSTRUCT=%d does not enable %d", QUDA_RECONSTRUCT, QUDA_RECONSTRUCT_13); } } else if (U.Reconstruct() == QUDA_RECONSTRUCT_12) { - if constexpr (is_enabled()) { + if constexpr (is_enabled(QUDA_RECONSTRUCT_12)) { errorQuda("QUDA_RECONSTRUCT=%d has not been implemented for HISQ gauge routines yet.", QUDA_RECONSTRUCT_12); } else { errorQuda("QUDA_RECONSTRUCT=%d does not enable %d\n", QUDA_RECONSTRUCT, QUDA_RECONSTRUCT_12); @@ -545,38 +541,43 @@ namespace quda @tparam dslash_type The dslash_type requested @return True if enabled, false if not */ - template constexpr bool is_enabled() { return false; } + constexpr bool is_enabled(QudaDslashType dslash_type) + { + switch (dslash_type) { #ifdef GPU_WILSON_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_WILSON_DSLASH: return true; #endif #ifdef GPU_CLOVER_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_CLOVER_WILSON_DSLASH: return true; #endif #ifdef GPU_CLOVER_HASENBUSCH_TWIST - template <> constexpr bool is_enabled() { return true; } + case QUDA_CLOVER_HASENBUSCH_TWIST_DSLASH: return true; #endif #ifdef GPU_DOMAIN_WALL_DIRAC - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } + case QUDA_DOMAIN_WALL_DSLASH: return true; + case QUDA_DOMAIN_WALL_4D_DSLASH: return true; + case QUDA_MOBIUS_DWF_DSLASH: return true; + case QUDA_MOBIUS_DWF_EOFA_DSLASH: return true; #endif #ifdef GPU_STAGGERED_DIRAC - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } + case QUDA_STAGGERED_DSLASH: return true; + case QUDA_ASQTAD_DSLASH: return true; #endif #ifdef GPU_TWISTED_MASS_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_TWISTED_MASS_DSLASH: return true; #endif #ifdef GPU_TWISTED_CLOVER_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_TWISTED_CLOVER_DSLASH: return true; #endif #ifdef GPU_LAPLACE - template <> constexpr bool is_enabled() { return true; } + case QUDA_LAPLACE_DSLASH: return true; #endif #ifdef GPU_COVDEV - template <> constexpr bool is_enabled() { return true; } + case QUDA_COVDEV_DSLASH: return true; #endif + default: return false; + } + } #ifdef GPU_DISTANCE_PRECONDITIONING constexpr bool is_enabled_distance_precondition() { return true; } diff --git a/include/instantiate_dslash.h b/include/instantiate_dslash.h index eab0ead243..d577c097af 100644 --- a/include/instantiate_dslash.h +++ b/include/instantiate_dslash.h @@ -22,17 +22,17 @@ namespace quda cvector_ref &x, const GaugeField &U, Args &&...args) { if (U.Reconstruct() == Recon::recon[0]) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_NO)) Apply(out, in, x, U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable reconstruct-18", QUDA_RECONSTRUCT); } else if (U.Reconstruct() == Recon::recon[1]) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_12)) Apply(out, in, x, U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable reconstruct-12/13", QUDA_RECONSTRUCT); } else if (U.Reconstruct() == Recon::recon[2]) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_8)) Apply(out, in, x, U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable reconstruct-8/9", QUDA_RECONSTRUCT); diff --git a/include/kernel_helper.h b/include/kernel_helper.h index dcb33baba0..336c9fe297 100644 --- a/include/kernel_helper.h +++ b/include/kernel_helper.h @@ -2,6 +2,9 @@ #include "comm_quda.h" +#undef TRUE +#undef FALSE + namespace quda { diff --git a/include/kernels/dslash_mdw_fused.cuh b/include/kernels/dslash_mdw_fused.cuh index 67f98b30cc..a28180605d 100644 --- a/include/kernels/dslash_mdw_fused.cuh +++ b/include/kernels/dslash_mdw_fused.cuh @@ -165,7 +165,7 @@ namespace quda { bool ret = false; #pragma unroll for (int d = 0; d < 4; d++) { - ret = ret or (coordinate[d] >= dim[d] - halo_shift[d] or coordinate[d] < halo_shift[d]); + ret = ret || (coordinate[d] >= dim[d] - halo_shift[d] || coordinate[d] < halo_shift[d]); } return ret; } diff --git a/include/kernels/evec_project.cuh b/include/kernels/evec_project.cuh index 7bea55d908..cf8d7bdbfe 100644 --- a/include/kernels/evec_project.cuh +++ b/include/kernels/evec_project.cuh @@ -11,8 +11,8 @@ namespace quda { using spinor_array = array; - constexpr unsigned long max_nx = 4; - constexpr unsigned long max_ny = 4; + constexpr unsigned long long max_nx = 4; + constexpr unsigned long long max_ny = 4; template struct EvecProjectionArg : public ReduceArg diff --git a/include/kernels/gauge_random.cuh b/include/kernels/gauge_random.cuh index 92c2b1995e..82d4ffea40 100644 --- a/include/kernels/gauge_random.cuh +++ b/include/kernels/gauge_random.cuh @@ -52,6 +52,8 @@ namespace quda { temp2[i] *= radius[i]; } + printf("%f %f\n", phi[0], radius[0]); + // construct Anti-Hermitian matrix const real rsqrt_3 = quda::rsqrt(3.0); ret(0, 0) = complex(0.0, temp1[2] + rsqrt_3 * temp2[3]); @@ -82,12 +84,12 @@ namespace quda { getCoords(x, x_cb, arg.X, parity); for (int dr = 0; dr < 4; ++dr) x[dr] += arg.border[dr]; // extended grid coordinates - if (arg.group and arg.sigma == 0.0) { + if (arg.group && arg.sigma == 0.0) { // if sigma = 0 then we just set the output matrix to the identity and finish Link I; setIdentity(&I); for (int mu = 0; mu < 4; mu++) arg.U(mu, linkIndex(x, arg.E), parity) = I; - } else if (not arg.group and arg.sigma == 0.0) { + } else if (! arg.group && arg.sigma == 0.0) { // if sigma = 0 then we just set the output matrix to the zero and finish Link O = {}; for (int mu = 0; mu < 4; mu++) arg.U(mu, linkIndex(x, arg.E), parity) = O; diff --git a/include/multi_blas_helper.cuh b/include/multi_blas_helper.cuh index 93dd15249c..58e105f956 100644 --- a/include/multi_blas_helper.cuh +++ b/include/multi_blas_helper.cuh @@ -235,7 +235,7 @@ namespace quda static_assert(coeff_nyw != 0, "coeff_nyw is zero"); // additional limit since there's diminished benefit past a certain point - constexpr auto max_nyw = 128lu; + constexpr auto max_nyw = 128llu; return std::min(arg_nyw, std::min(coeff_nyw, max_nyw)); } @@ -290,7 +290,7 @@ namespace quda const auto coeff_nyw = Functor::coeff_mul ? max_array_size() / (NXZ * sizeof(typename Functor::coeff_t)) : arg_nyw; // additional limit since there's diminished benefit past a certain point - constexpr auto max_nyw = 128lu; + constexpr auto max_nyw = 128llu; return std::min(arg_nyw, std::min(coeff_nyw, max_nyw)); } diff --git a/include/polynomial.h b/include/polynomial.h index aa51d372ed..69eeef551d 100644 --- a/include/polynomial.h +++ b/include/polynomial.h @@ -1,5 +1,10 @@ #pragma once +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#include +#undef _USE_MATH_DEFINES +#endif #include #include #include diff --git a/include/quda.h b/include/quda.h index 155791b60a..87666f96f6 100644 --- a/include/quda.h +++ b/include/quda.h @@ -15,8 +15,27 @@ #include #ifndef __CUDACC_RTC__ +#ifdef _MSC_VER +#ifdef __cplusplus +#include +extern "C" typedef struct { + double real, imag; + operator std::complex() const { return std::complex(real, imag); } + operator std::complex() const { return std::complex(real, imag); } +} _Dcomplex; +#else +typedef struct { + double real, imag; +} _Dcomplex; +#endif +#define API __declspec(dllexport) +#define double_complex _Dcomplex +#else +#define API #define double_complex double _Complex +#endif #else // keep NVRTC happy since it can't handle C types +#define API #define double_complex double2 #endif @@ -930,7 +949,7 @@ extern "C" { * returned by fopen()) where messages should be * printed. The default is stdout. */ - void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], + API void setVerbosityQuda(QudaVerbosity verbosity, const char prefix[], FILE *outfile); /** @@ -949,7 +968,7 @@ extern "C" { * @param mycomm User provided MPI communicator in place of MPI_COMM_WORLD */ - void qudaSetCommHandle(void *mycomm); + API void qudaSetCommHandle(void *mycomm); /** * Declare the grid mapping ("logical topology" in QMP parlance) @@ -978,7 +997,7 @@ extern "C" { * @see QudaCommsMap */ - void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata); + API void initCommsGridQuda(int nDim, const int *dims, QudaCommsMap func, void *fdata); /** * Initialize the library. This is a low-level interface that is @@ -990,7 +1009,7 @@ extern "C" { * per-process basis or set to -1 to enable a default * allocation of devices to processes. */ - void initQudaDevice(int device); + API void initQudaDevice(int device); /** * Initialize the library persistant memory allocations (both host @@ -998,7 +1017,7 @@ extern "C" { * initQuda. Calling initQudaMemory requires that the user has * previously called initQudaDevice. */ - void initQudaMemory(); + API void initQudaMemory(); /** * Initialize the library. This function is actually a wrapper @@ -1009,19 +1028,19 @@ extern "C" { * per-process basis or set to -1 to enable a default * allocation of devices to processes. */ - void initQuda(int device); + API void initQuda(int device); /** * Finalize the library. */ - void endQuda(void); + API void endQuda(void); /** * @brief update the radius for halos. * @details This should only be needed for automated testing when * different partitioning is applied within a single run. */ - void updateR(); + API void updateR(); /** * A new QudaGaugeParam should always be initialized immediately @@ -1030,7 +1049,7 @@ extern "C" { * * QudaGaugeParam gauge_param = newQudaGaugeParam(); */ - QudaGaugeParam newQudaGaugeParam(void); + API QudaGaugeParam newQudaGaugeParam(void); /** * A new QudaInvertParam should always be initialized immediately @@ -1039,7 +1058,7 @@ extern "C" { * * QudaInvertParam invert_param = newQudaInvertParam(); */ - QudaInvertParam newQudaInvertParam(void); + API QudaInvertParam newQudaInvertParam(void); /** * A new QudaMultigridParam should always be initialized immediately @@ -1048,7 +1067,7 @@ extern "C" { * * QudaMultigridParam mg_param = newQudaMultigridParam(); */ - QudaMultigridParam newQudaMultigridParam(void); + API QudaMultigridParam newQudaMultigridParam(void); /** * A new QudaEigParam should always be initialized immediately @@ -1057,7 +1076,7 @@ extern "C" { * * QudaEigParam eig_param = newQudaEigParam(); */ - QudaEigParam newQudaEigParam(void); + API QudaEigParam newQudaEigParam(void); /** * A new QudaGaugeObservableParam should always be initialized @@ -1066,7 +1085,7 @@ extern "C" { * * QudaGaugeObservalbeParam obs_param = newQudaGaugeObservableParam(); */ - QudaGaugeObservableParam newQudaGaugeObservableParam(void); + API QudaGaugeObservableParam newQudaGaugeObservableParam(void); /** * A new QudaGaugeSmearParam should always be initialized @@ -1075,7 +1094,7 @@ extern "C" { * * QudaGaugeSmearParam smear_param = newQudaGaugeSmearParam(); */ - QudaGaugeSmearParam newQudaGaugeSmearParam(void); + API QudaGaugeSmearParam newQudaGaugeSmearParam(void); /** * A new QudaBLASParam should always be initialized immediately @@ -1084,78 +1103,78 @@ extern "C" { * * QudaBLASParam blas_param = newQudaBLASParam(); */ - QudaBLASParam newQudaBLASParam(void); + API QudaBLASParam newQudaBLASParam(void); /** * Print the members of QudaGaugeParam. * @param param The QudaGaugeParam whose elements we are to print. */ - void printQudaGaugeParam(QudaGaugeParam *param); + API void printQudaGaugeParam(QudaGaugeParam *param); /** * Print the members of QudaInvertParam. * @param param The QudaInvertParam whose elements we are to print. */ - void printQudaInvertParam(QudaInvertParam *param); + API void printQudaInvertParam(QudaInvertParam *param); /** * Print the members of QudaMultigridParam. * @param param The QudaMultigridParam whose elements we are to print. */ - void printQudaMultigridParam(QudaMultigridParam *param); + API void printQudaMultigridParam(QudaMultigridParam *param); /** * Print the members of QudaEigParam. * @param param The QudaEigParam whose elements we are to print. */ - void printQudaEigParam(QudaEigParam *param); + API void printQudaEigParam(QudaEigParam *param); /** * Print the members of QudaGaugeObservableParam. * @param param The QudaGaugeObservableParam whose elements we are to print. */ - void printQudaGaugeObservableParam(QudaGaugeObservableParam *param); + API void printQudaGaugeObservableParam(QudaGaugeObservableParam *param); /** * Print the members of QudaBLASParam. * @param param The QudaBLASParam whose elements we are to print. */ - void printQudaBLASParam(QudaBLASParam *param); + API void printQudaBLASParam(QudaBLASParam *param); /** * Load the gauge field from the host. * @param h_gauge Base pointer to host gauge field (regardless of dimensionality) * @param param Contains all metadata regarding host and device storage */ - void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param); + API void loadGaugeQuda(void *h_gauge, QudaGaugeParam *param); /** * Free QUDA's internal copy of the gauge field. */ - void freeGaugeQuda(void); + API void freeGaugeQuda(void); /** * Free a unique type (Wilson, HISQ fat, HISQ long, smeared) of internal gauge field. * @param link_type[in] Type of link type to free up */ - void freeUniqueGaugeQuda(QudaLinkType link_type); + API void freeUniqueGaugeQuda(QudaLinkType link_type); /** * Free QUDA's internal smeared gauge field. */ - void freeGaugeSmearedQuda(void); + API void freeGaugeSmearedQuda(void); /** * Free QUDA's internal two-link gauge field. */ - void freeGaugeTwoLinkQuda(void); + API void freeGaugeTwoLinkQuda(void); /** * Save the gauge field to the host. * @param h_gauge Base pointer to host gauge field (regardless of dimensionality) * @param param Contains all metadata regarding host and device storage */ - void saveGaugeQuda(void *h_gauge, QudaGaugeParam *param); + API void saveGaugeQuda(void *h_gauge, QudaGaugeParam *param); /** * Load the clover term and/or the clover inverse from the host. @@ -1164,13 +1183,13 @@ extern "C" { * @param h_cloverinv Base pointer to host clover inverse field * @param inv_param Contains all metadata regarding host and device storage */ - void loadCloverQuda(void *h_clover, void *h_clovinv, + API void loadCloverQuda(void *h_clover, void *h_clovinv, QudaInvertParam *inv_param); /** * Free QUDA's internal copy of the clover term and/or clover inverse. */ - void freeCloverQuda(void); + API void freeCloverQuda(void); /** * Perform the solve, according to the parameters set in param. It @@ -1181,7 +1200,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage and solver parameters */ - void lanczosQuda(int k0, int m, void *hp_Apsi, void *hp_r, void *hp_V, void *hp_alpha, void *hp_beta, + API void lanczosQuda(int k0, int m, void *hp_Apsi, void *hp_r, void *hp_V, void *hp_alpha, void *hp_beta, QudaEigParam *eig_param); /** @@ -1192,7 +1211,7 @@ extern "C" { * @param h_evals Host side eigenvalues * @param param Contains all metadata regarding the type of solve. */ - void eigensolveQuda(void **h_evecs, double_complex *h_evals, QudaEigParam *param); + API void eigensolveQuda(void **h_evecs, double_complex *h_evals, QudaEigParam *param); /** * Perform the solve, according to the parameters set in param. It @@ -1203,7 +1222,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage and solver parameters */ - void invertQuda(void *h_x, void *h_b, QudaInvertParam *param); + API void invertQuda(void *h_x, void *h_b, QudaInvertParam *param); /** * @brief Perform the solve like @invertQuda but for multiple rhs by spliting the comm grid into @@ -1218,7 +1237,7 @@ extern "C" { * @param _hp_b Array of source spinor fields * @param param Contains all metadata regarding host and device storage and solver parameters */ - void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param); + API void invertMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param); /** * Solve for multiple shifts (e.g., masses). @@ -1227,7 +1246,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage and solver parameters */ - void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param); + API void invertMultiShiftQuda(void **_hp_x, void *_hp_b, QudaInvertParam *param); /** * Setup the multigrid solver, according to the parameters set in param. It @@ -1236,7 +1255,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage and solver parameters */ - void* newMultigridQuda(QudaMultigridParam *param); + API void* newMultigridQuda(QudaMultigridParam *param); /** * @brief Free resources allocated by the multigrid solver @@ -1244,7 +1263,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage and solver parameters */ - void destroyMultigridQuda(void *mg_instance); + API void destroyMultigridQuda(void *mg_instance); /** * @brief Updates the multigrid preconditioner for the new gauge / clover field @@ -1253,7 +1272,7 @@ extern "C" { * storage and solver parameters, of note contains a flag specifying whether * to do a full update or a thin update. */ - void updateMultigridQuda(void *mg_instance, QudaMultigridParam *param); + API void updateMultigridQuda(void *mg_instance, QudaMultigridParam *param); /** * @brief Dump the null-space vectors to disk @@ -1262,7 +1281,7 @@ extern "C" { * storage and solver parameters (QudaMultigridParam::vec_outfile * sets the output filename prefix). */ - void dumpMultigridQuda(void *mg_instance, QudaMultigridParam *param); + API void dumpMultigridQuda(void *mg_instance, QudaMultigridParam *param); /** * Apply the Dslash operator (D_{eo} or D_{oe}). @@ -1272,7 +1291,7 @@ extern "C" { * storage * @param[in] parity The destination parity of the field */ - void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity); + API void dslashQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity); /** * Apply the covariant derivative. @@ -1281,7 +1300,7 @@ extern "C" { * @param[in] dir Direction of application * @param[in] param Metadata for host and device storage */ - void covDevQuda(void *h_out, void *h_in, int dir, QudaInvertParam *param); + API void covDevQuda(void *h_out, void *h_in, int dir, QudaInvertParam *param); /** * Apply the covariant derivative. @@ -1291,7 +1310,7 @@ extern "C" { * @param[in] sym Apply forward=2, backward=2 or symmetric=3 shift * @param[in] param Metadata for host and device storage */ - void shiftQuda(void *h_out, void *h_in, int dir, int sym, QudaInvertParam *param); + API void shiftQuda(void *h_out, void *h_in, int dir, int sym, QudaInvertParam *param); /** * Apply the spin-taste operator. @@ -1301,7 +1320,7 @@ extern "C" { * @param[in] taste Taste gamma structure * @param[in] param Metadata for host and device storage */ - void spinTasteQuda(void *h_out, void *h_in, int spin, int taste, QudaInvertParam *param); + API void spinTasteQuda(void *h_out, void *h_in, int spin, int taste, QudaInvertParam *param); /** * @brief Perform the solve like @dslashQuda but for multiple rhs by spliting the comm grid into @@ -1314,7 +1333,7 @@ extern "C" { * @param param Contains all metadata regarding host and device storage and solver parameters * @param parity Parity to apply dslash on */ - void dslashMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, QudaParity parity); + API void dslashMultiSrcQuda(void **_hp_x, void **_hp_b, QudaInvertParam *param, QudaParity parity); /** * Apply the clover operator or its inverse. @@ -1325,7 +1344,7 @@ extern "C" { * @param parity The source and destination parity of the field * @param inverse Whether to apply the inverse of the clover term */ - void cloverQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int inverse); + API void cloverQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaParity parity, int inverse); /** * Apply the full Dslash matrix, possibly even/odd preconditioned. @@ -1334,7 +1353,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage */ - void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param); + API void MatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param); /** * Apply M^{\dag}M, possibly even/odd preconditioned. @@ -1343,7 +1362,7 @@ extern "C" { * @param param Contains all metadata regarding host and device * storage */ - void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param); + API void MatDagMatQuda(void *h_out, void *h_in, QudaInvertParam *inv_param); /* @@ -1355,7 +1374,7 @@ extern "C" { void pack_ghost(void **cpuLink, void **cpuGhost, int nFace, QudaPrecision precision); - void computeKSLinkQuda(void* fatlink, void* longlink, void* ulink, void* inlink, + API void computeKSLinkQuda(void* fatlink, void* longlink, void* ulink, void* inlink, double *path_coeff, QudaGaugeParam *param); /** @@ -1366,7 +1385,7 @@ extern "C" { * @param[in] param Contains all metadata regarding host and device * storage */ - void computeTwoLinkQuda(void *twolink, void *inlink, QudaGaugeParam *param); + API void computeTwoLinkQuda(void *twolink, void *inlink, QudaGaugeParam *param); /** * Either downloads and sets the resident momentum field, or uploads @@ -1375,7 +1394,7 @@ extern "C" { * @param[in,out] mom The external momentum field * @param[in] param The parameters of the external field */ - void momResidentQuda(void *mom, QudaGaugeParam *param); + API void momResidentQuda(void *mom, QudaGaugeParam *param); /** * Compute the gauge force and update the momentum field @@ -1390,7 +1409,7 @@ extern "C" { * @param[in] dt The integration step size (for MILC this is dt*beta/3) * @param[in] param The parameters of the external fields and the computation settings */ - int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, + API int computeGaugeForceQuda(void *mom, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam); /** @@ -1406,7 +1425,7 @@ extern "C" { * @param[in] dt The integration step size (for MILC this is dt*beta/3) * @param[in] param The parameters of the external fields and the computation settings */ - int computeGaugePathQuda(void *out, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, + API int computeGaugePathQuda(void *out, void *sitelink, int ***input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double dt, QudaGaugeParam *qudaGaugeParam); /** @@ -1420,7 +1439,7 @@ extern "C" { * @param[in] max_length The maximum number of non-zero of links in any path in the action * @param[in] factor An overall normalization factor */ - void computeGaugeLoopTraceQuda(double_complex *traces, int **input_path_buf, int *path_length, double *loop_coeff, + API void computeGaugeLoopTraceQuda(double_complex *traces, int **input_path_buf, int *path_length, double *loop_coeff, int num_paths, int max_length, double factor); /** @@ -1434,7 +1453,7 @@ extern "C" { * @param exact Whether to use an exact exponential or Taylor expand * @param param The parameters of the external fields and the computation settings */ - void updateGaugeFieldQuda(void* gauge, void* momentum, double dt, + API void updateGaugeFieldQuda(void* gauge, void* momentum, double dt, int conj_mom, int exact, QudaGaugeParam* param); /** @@ -1446,7 +1465,7 @@ extern "C" { * @param gauge_h The gauge field * @param param The parameters of the gauge field */ - void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param); + API void staggeredPhaseQuda(void *gauge_h, QudaGaugeParam *param); /** * Project the input field on the SU(3) group. If the target @@ -1456,7 +1475,7 @@ extern "C" { * @param tol The tolerance to which we iterate * @param param The parameters of the gauge field */ - void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param); + API void projectSU3Quda(void *gauge_h, double tol, QudaGaugeParam *param); /** * Evaluate the momentum contribution to the Hybrid Monte Carlo @@ -1466,7 +1485,7 @@ extern "C" { * @param param The parameters of the external fields and the computation settings * @return momentum action */ - double momActionQuda(void* momentum, QudaGaugeParam* param); + API double momActionQuda(void* momentum, QudaGaugeParam* param); /** * Allocate a gauge (matrix) field on the device and optionally download a host gauge field. @@ -1476,7 +1495,7 @@ extern "C" { * @param param The parameters of the external field and the field to be created * @return Pointer to the gauge field (cast as a void*) */ - void* createGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param); + API void* createGaugeFieldQuda(void* gauge, int geometry, QudaGaugeParam* param); /** * Copy the QUDA gauge (matrix) field on the device to the CPU @@ -1485,21 +1504,21 @@ extern "C" { * @param inGauge Pointer to the device gauge field (QUDA device field) * @param param The parameters of the host and device fields */ - void saveGaugeFieldQuda(void* outGauge, void* inGauge, QudaGaugeParam* param); + API void saveGaugeFieldQuda(void* outGauge, void* inGauge, QudaGaugeParam* param); /** * Reinterpret gauge as a pointer to a GaugeField and call destructor. * * @param gauge Gauge field to be freed */ - void destroyGaugeFieldQuda(void* gauge); + API void destroyGaugeFieldQuda(void* gauge); /** * Compute the clover field and its inverse from the resident gauge field. * * @param param The parameters of the clover field to create */ - void createCloverQuda(QudaInvertParam* param); + API void createCloverQuda(QudaInvertParam* param); /** * Compute the clover force contributions from a set of partial @@ -1519,7 +1538,7 @@ extern "C" { * @param gauge_param Gauge field meta data * @param inv_param Dirac and solver meta data */ - void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck, + API void computeCloverForceQuda(void *mom, double dt, void **x, void **p, double *coeff, double kappa2, double ck, int nvector, double multiplicity, void *gauge, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param); @@ -1537,7 +1556,7 @@ extern "C" { * @param inv_param Dirac and solver meta data * @param detratio if 0 compute the force of a determinant otherwise compute the force from a ratio of determinants */ - void computeTMCloverForceQuda(void *h_mom, void **h_x, void **h_x0, double *coeff, int nvector, + API void computeTMCloverForceQuda(void *h_mom, void **h_x, void **h_x0, double *coeff, int nvector, QudaGaugeParam *gauge_param, QudaInvertParam *inv_param, int detratio); /** @@ -1551,7 +1570,7 @@ extern "C" { * @param gauge_param Gauge field meta data * @param invert_param Dirac and solver meta data */ - void computeStaggeredForceQuda(void *mom, double dt, double delta, void *gauge, void **x, QudaGaugeParam *gauge_param, + API void computeStaggeredForceQuda(void *mom, double dt, double delta, void *gauge, void **x, QudaGaugeParam *gauge_param, QudaInvertParam *invert_param); /** @@ -1569,7 +1588,7 @@ extern "C" { * @param coeff The coefficient multiplying the fermion fields in the outer product * @param param. The field parameters. */ - void computeHISQForceQuda(void* momentum, + API void computeHISQForceQuda(void* momentum, double dt, const double level2_coeff[6], const double fat7_coeff[6], @@ -1593,7 +1612,7 @@ extern "C" { @param seed The seed used for the RNG @param sigma Width of Gaussian distrubution */ - void gaussGaugeQuda(unsigned long long seed, double sigma); + API void gaussGaugeQuda(unsigned long long seed, double sigma); /** * @brief Generate Gaussian distributed fields and store in the @@ -1605,13 +1624,13 @@ extern "C" { * @param seed The seed used for the RNG * @param sigma Width of Gaussian distrubution */ - void gaussMomQuda(unsigned long long seed, double sigma); + API void gaussMomQuda(unsigned long long seed, double sigma); /** * Computes the total, spatial and temporal plaquette averages of the loaded gauge configuration. * @param[out] Array for storing the averages (total, spatial, temporal) */ - void plaqQuda(double plaq[3]); + API void plaqQuda(double plaq[3]); /** @brief Computes the trace of the Polyakov loop of the current resident field @@ -1620,13 +1639,13 @@ extern "C" { @param[out] ploop Trace of the Polyakov loop in direction dir @param[in] dir Direction of Polyakov loop */ - void polyakovLoopQuda(double ploop[2], int dir); + API void polyakovLoopQuda(double ploop[2], int dir); /** * Performs a deep copy from the internal extendedGaugeResident field. * @param Pointer to externally allocated GaugeField */ - void copyExtendedResidentGaugeQuda(void *resident_gauge); + API void copyExtendedResidentGaugeQuda(void *resident_gauge); /** * Performs gaussian/Wuppertal smearing on a given spinor using the gauge field @@ -1639,7 +1658,7 @@ extern "C" { * @param coeff Width of the Gaussian distribution * @param smear_type Gaussian/Wuppertal smearing */ - void performFermionSmearQuda(void *h_out, void *h_in, QudaInvertParam *param, const int n_steps, const double coeff, + API void performFermionSmearQuda(void *h_out, void *h_in, QudaInvertParam *param, const int n_steps, const double coeff, const QudaFermionSmearType smear_type); /** @@ -1653,7 +1672,7 @@ extern "C" { * @param n_steps Number of steps to apply. * @param alpha Alpha coefficient for Wuppertal smearing. */ - void performWuppertalnStep(void *h_out, void *h_in, QudaInvertParam *param, unsigned int n_steps, double alpha); + API void performWuppertalnStep(void *h_out, void *h_in, QudaInvertParam *param, unsigned int n_steps, double alpha); /** * LEGACY @@ -1666,7 +1685,7 @@ extern "C" { * @param n_steps Number of steps to apply. * @param omega Width of the Gaussian distribution */ - void performGaussianSmearNStep(void *h_out, void *h_in, QudaInvertParam *param, const int n_steps, const double omega); + API void performGaussianSmearNStep(void *h_out, void *h_in, QudaInvertParam *param, const int n_steps, const double omega); /** * Performs APE, Stout, or Over Imroved STOUT smearing on gaugePrecise and stores it in gaugeSmeared @@ -1674,7 +1693,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performGaugeSmearQuda(QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); + API void performGaugeSmearQuda(QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); /** * Performs Wilson Flow on gaugePrecise and stores it in gaugeSmeared @@ -1682,7 +1701,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performWFlowQuda(QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); + API void performWFlowQuda(QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); /** * Performs Gradient Flow (gauge + fermion) on gaugePrecise and stores it in gaugeSmeared @@ -1692,7 +1711,7 @@ extern "C" { * @param[in,out] obs_param Parameter struct that defines which * observables we are making and the resulting observables. */ - void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, + API void performGFlowQuda(void *h_out, void *h_in, QudaInvertParam *inv_param, QudaGaugeSmearParam *smear_param, QudaGaugeObservableParam *obs_param); /** @@ -1703,7 +1722,7 @@ extern "C" { * @param[in,out] param Parameter struct that defines which * observables we are making and the resulting observables. */ - void gaugeObservablesQuda(QudaGaugeObservableParam *param); + API void gaugeObservablesQuda(QudaGaugeObservableParam *param); /** * Public function to perform color contractions of the host spinors x and y. @@ -1714,7 +1733,7 @@ extern "C" { * @param[in] param meta data for construction of ColorSpinorFields. * @param[in] X spacetime data for construction of ColorSpinorFields. */ - void contractQuda(const void *x, const void *y, void *result, const QudaContractType cType, QudaInvertParam *param, + API void contractQuda(const void *x, const void *y, void *result, const QudaContractType cType, QudaInvertParam *param, const int *X); /** @@ -1730,7 +1749,7 @@ extern "C" { * @param[in] mom_modes momentum modes * @param[in] fft_type Fourier phase factor type (cos, sin or exp{ikx}) */ - void contractFTQuda(void **x, void **y, void **result, const QudaContractType cType, void *cs_param_ptr, + API void contractFTQuda(void **x, void **y, void **result, const QudaContractType cType, void *cs_param_ptr, const int src_colors, const int *X, const int *const source_position, const int n_mom, const int *const mom_modes, const QudaFFTSymmType *const fft_type); @@ -1747,7 +1766,7 @@ extern "C" { * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in] param The parameters of the external fields and the computation settings */ - int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, + API int computeGaugeFixingOVRQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double relax_boost, const double tolerance, const unsigned int reunit_interval, const unsigned int stopWtheta, QudaGaugeParam *param); @@ -1764,7 +1783,7 @@ extern "C" { * @param[in] stopWtheta, 0 for MILC criterion and 1 to use the theta value * @param[in] param The parameters of the external fields and the computation settings */ - int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, + API int computeGaugeFixingFFTQuda(void *gauge, const unsigned int gauge_dir, const unsigned int Nsteps, const unsigned int verbose_interval, const double alpha, const unsigned int autotune, const double tolerance, const unsigned int stopWtheta, QudaGaugeParam *param); @@ -1776,7 +1795,7 @@ extern "C" { * @param[in] native Boolean to use either the native or generic version * @param[in] param The data defining the problem execution. */ - void blasGEMMQuda(void *arrayA, void *arrayB, void *arrayC, QudaBoolean native, QudaBLASParam *param); + API void blasGEMMQuda(void *arrayA, void *arrayB, void *arrayC, QudaBoolean native, QudaBLASParam *param); /** * @brief Strided Batched in-place matrix inversion via LU @@ -1785,27 +1804,27 @@ extern "C" { * @param[in] use_native Boolean to use either the native or generic version * @param[in] param The data defining the problem execution. */ - void blasLUInvQuda(void *Ainv, void *A, QudaBoolean use_native, QudaBLASParam *param); + API void blasLUInvQuda(void *Ainv, void *A, QudaBoolean use_native, QudaBLASParam *param); /** * @brief Flush the chronological history for the given index * @param[in] index Index for which we are flushing */ - void flushChronoQuda(int index); + API void flushChronoQuda(int index); /** * Create deflation solver resources. * **/ - void* newDeflationQuda(QudaEigParam *param); + API void* newDeflationQuda(QudaEigParam *param); /** * Free resources allocated by the deflated solver */ - void destroyDeflationQuda(void *df_instance); + API void destroyDeflationQuda(void *df_instance); - void setMPICommHandleQuda(void *mycomm); + API void setMPICommHandleQuda(void *mycomm); // Parameter set for quark smearing operations typedef struct QudaQuarkSmearParam_s { @@ -1839,7 +1858,7 @@ extern "C" { * @param[in,out] h_in Input spinor field to smear * @param[in] smear_param Contains all metadata the operator which will be applied to the spinor */ - void performTwoLinkGaussianSmearNStep(void *h_in, QudaQuarkSmearParam *smear_param); + API void performTwoLinkGaussianSmearNStep(void *h_in, QudaQuarkSmearParam *smear_param); /** * @brief Performs contractions between a set of quark fields and @@ -1856,15 +1875,17 @@ extern "C" { * @param[in] inv_param Meta-data structure * @param[in] X Lattice dimensions */ - void laphSinkProject(double _Complex *host_sinks, void **host_quark, int n_quark, int tile_quark, + API void laphSinkProject(double_complex *host_sinks, void **host_quark, int n_quark, int tile_quark, void **host_evec, int nevec, int tile_evec, QudaInvertParam *inv_param, const int X[4]); #ifdef __cplusplus } #endif +#ifndef _INTERFACE_ // remove NVRTC WAR #undef double_complex +#endif /* #include */ diff --git a/include/quda_matrix.h b/include/quda_matrix.h index d3a306f68e..8290d09762 100644 --- a/include/quda_matrix.h +++ b/include/quda_matrix.h @@ -995,11 +995,11 @@ namespace quda { // if its argument (tmp) is zero and then return unity. Complex tmp = a3 + sg2h3; - if (tmp.real() == 0 and tmp.imag() == 0) { + if (tmp.real() == 0 && tmp.imag() == 0) { // Making sure q is a zero matrix bool iszero = true; for (int i = 0; i < 9; i++) { - if (q(i).real() != 0 or q(i).imag() != 0) { + if (q(i).real() != 0 || q(i).imag() != 0) { iszero = false; break; } diff --git a/include/reliable_updates.h b/include/reliable_updates.h index ec70fb4f3c..b87e8ce393 100644 --- a/include/reliable_updates.h +++ b/include/reliable_updates.h @@ -130,8 +130,8 @@ namespace quda { if (params.alternative_reliable) { // alternative reliable updates - updateX = ((d <= deps * sqrt(r2_old)) or (dfac * dinit > deps * r0Norm)) and (d_new > deps * rNorm) - and (d_new > dfac * dinit); + updateX = ((d <= deps * sqrt(r2_old)) || (dfac * dinit > deps * r0Norm)) && (d_new > deps * rNorm) + && (d_new > dfac * dinit); updateR = 0; } else { if (rNorm > maxrx) maxrx = rNorm; @@ -205,19 +205,19 @@ namespace quda bool reliable_break(double r2, double stop, bool &L2breakdown, double L2breakdown_eps) { // break-out check if we have reached the limit of the precision - if (sqrt(r2) > r0Norm && updateX and not L2breakdown) { // reuse r0Norm for this + if (sqrt(r2) > r0Norm && updateX && ! L2breakdown) { // reuse r0Norm for this resIncrease++; resIncreaseTotal++; warningQuda("new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)", sqrt(r2), r0Norm, resIncreaseTotal); - if ((params.use_heavy_quark_res and sqrt(r2) < L2breakdown_eps) or resIncrease > params.maxResIncrease - or resIncreaseTotal > params.maxResIncreaseTotal or r2 < stop) { + if ((params.use_heavy_quark_res && sqrt(r2) < L2breakdown_eps) || resIncrease > params.maxResIncrease + || resIncreaseTotal > params.maxResIncreaseTotal || r2 < stop) { if (params.use_heavy_quark_res) { L2breakdown = true; warningQuda("L2 breakdown %e, %e", sqrt(r2), L2breakdown_eps); } else { - if (resIncrease > params.maxResIncrease or resIncreaseTotal > params.maxResIncreaseTotal or r2 < stop) { + if (resIncrease > params.maxResIncrease || resIncreaseTotal > params.maxResIncreaseTotal || r2 < stop) { warningQuda("solver exiting due to too many true residual norm increases"); return true; } @@ -239,7 +239,7 @@ namespace quda bool reliable_heavy_quark_break(bool L2breakdown, double heavy_quark_res, double heavy_quark_res_old, bool &heavy_quark_restart) { - if (params.use_heavy_quark_res and L2breakdown) { + if (params.use_heavy_quark_res && L2breakdown) { hqresRestartTotal++; // count the number of heavy quark restarts we've done delta = 0; warningQuda("CG: Restarting without reliable updates for heavy-quark residual (total #inc %i)", diff --git a/include/targets/cuda/block_reduction_kernel.h b/include/targets/cuda/block_reduction_kernel.h index 27551430c8..2bb75affaa 100644 --- a/include/targets/cuda/block_reduction_kernel.h +++ b/include/targets/cuda/block_reduction_kernel.h @@ -129,13 +129,13 @@ namespace quda per thread (in the x dimension). Not supported at present. @param[in] arg Kernel argument */ - template