diff --git a/CMakeLists.txt b/CMakeLists.txt index 803f5dba41..0146130cee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,10 @@ include(cmake/CPM.cmake) find_package(Git) +if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) +endif() + # By default we will build DEVEL. The different build types will pass different # flags to the compiler which may be strict or permissive on warnings, or # very verbose at run time and/or compile time. @@ -337,7 +341,7 @@ set(CMAKE_CXX_FLAGS_STRICT "-Os" CACHE STRING "Flags used by the C++ compiler during strict jenkins builds.") set(CMAKE_CXX_FLAGS_RELEASE - "-O3 ${CXX_OPT}" + "-Zi -O2 ${CXX_OPT}" CACHE STRING "Flags used by the C++ compiler during release builds.") set(CMAKE_CXX_FLAGS_HOSTDEBUG "-g" @@ -359,7 +363,7 @@ set(CMAKE_C_FLAGS_STRICT "-Os" CACHE STRING "Flags used by the C compiler during strict jenkins builds.") set(CMAKE_C_FLAGS_RELEASE - "-O3" + "-Zi -O2" CACHE STRING "Flags used by the C compiler during release builds.") set(CMAKE_C_FLAGS_HOSTDEBUG "-g" @@ -709,5 +713,7 @@ include(CTest) # add tests, utils, reference, and quda library add_subdirectory(lib) -add_subdirectory(tests) +if (NOT WIN32) + add_subdirectory(tests) +endif() add_subdirectory(doc) diff --git a/include/color_spinor_field_order.h b/include/color_spinor_field_order.h index 25f5234390..0664a40c47 100644 --- a/include/color_spinor_field_order.h +++ b/include/color_spinor_field_order.h @@ -1196,7 +1196,7 @@ namespace quda using Accessor = FloatNOrder; using GhostNOrder = GhostNOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; using Vector = typename VectorType::type; using AllocInt = typename AllocType::type; using norm_type = float; @@ -1432,7 +1432,7 @@ namespace quda using Accessor = FloatNOrder; using GhostNOrder = GhostNOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; using Vector = int4; // 128-bit packed type using AllocInt = typename AllocType::type; using norm_type = float; diff --git a/include/communicator_quda.h b/include/communicator_quda.h index aec02b8c2a..c4b0718904 100644 --- a/include/communicator_quda.h +++ b/include/communicator_quda.h @@ -1,6 +1,6 @@ #pragma once -#include // for gethostname() +// #include // for gethostname() #include #include #include diff --git a/include/complex_quda.h b/include/complex_quda.h index 18da63def5..9dbc6c95a1 100644 --- a/include/complex_quda.h +++ b/include/complex_quda.h @@ -20,6 +20,11 @@ #pragma once +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#include +#undef _USE_MATH_DEFINES +#endif #include #include #include diff --git a/include/gauge_field_order.h b/include/gauge_field_order.h index 38079c3cbb..2b8c067c99 100644 --- a/include/gauge_field_order.h +++ b/include/gauge_field_order.h @@ -1852,7 +1852,7 @@ namespace quda { template struct QDPOrder : public LegacyOrder { using Accessor = QDPOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge[QUDA_MAX_DIM]; const unsigned int volumeCB; QDPOrder(const GaugeField &u, Float *gauge_ = 0, Float **ghost_ = 0) : @@ -1898,7 +1898,7 @@ namespace quda { template struct QDPJITOrder : public LegacyOrder { using Accessor = QDPJITOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge[QUDA_MAX_DIM]; const unsigned int volumeCB; QDPJITOrder(const GaugeField &u, Float *gauge_ = 0, Float **ghost_ = 0) : @@ -1948,7 +1948,7 @@ namespace quda { template struct MILCOrder : public LegacyOrder { using Accessor = MILCOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; const int geometry; @@ -2009,7 +2009,7 @@ namespace quda { template struct MILCSiteOrder : public LegacyOrder { using Accessor = MILCSiteOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; const int geometry; @@ -2068,7 +2068,7 @@ namespace quda { template struct CPSOrder : LegacyOrder { using Accessor = CPSOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; const real anisotropy; @@ -2137,7 +2137,7 @@ namespace quda { template struct BQCDOrder : LegacyOrder { using Accessor = BQCDOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; unsigned int exVolumeCB; // extended checkerboard volume @@ -2199,7 +2199,7 @@ namespace quda { template struct TIFROrder : LegacyOrder { using Accessor = TIFROrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; static constexpr int Nc = 3; @@ -2263,7 +2263,7 @@ namespace quda { template struct TIFRPaddedOrder : LegacyOrder { using Accessor = TIFRPaddedOrder; using real = typename mapper::type; - using complex = complex; + using complex = quda::complex; Float *gauge; const unsigned int volumeCB; int exVolumeCB; diff --git a/include/instantiate.h b/include/instantiate.h index 8eee6ad269..6597fb3156 100644 --- a/include/instantiate.h +++ b/include/instantiate.h @@ -33,41 +33,32 @@ namespace quda @brief Helper function for returning if a given gauge field order is enabled @tparam order The order requested */ - template constexpr bool is_enabled(); + constexpr bool is_enabled(QudaGaugeFieldOrder order) + { + switch (order) { #ifdef BUILD_QDP_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_QDP_GAUGE_ORDER: return true; #endif #ifdef BUILD_QDPJIT_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_QDPJIT_GAUGE_ORDER: return true; #endif #ifdef BUILD_CPS_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_CPS_WILSON_GAUGE_ORDER: return true; #endif #ifdef BUILD_MILC_INTERFACE - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } - template <> constexpr bool is_enabled() { return false; } + case QUDA_MILC_GAUGE_ORDER: return true; + case QUDA_MILC_SITE_GAUGE_ORDER: return true; #endif #ifdef BUILD_BQCD_INTERFACE - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } + case QUDA_BQCD_GAUGE_ORDER: return true; #endif #ifdef BUILD_TIFR_INTERFACE - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } -#else - template <> constexpr bool is_enabled() { return false; } - template <> constexpr bool is_enabled() { return false; } + case QUDA_TIFR_GAUGE_ORDER: return true; + case QUDA_TIFR_PADDED_GAUGE_ORDER: return true; #endif + default: return false; + } + } /** @brief Helper function for returning if a given precision is enabled @@ -89,13 +80,18 @@ namespace quda @tparam reconstruct The reconstruct requested @return True if enabled, false if not */ - template constexpr bool is_enabled(); - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 4) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 2) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 2) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 1) ? true : false; } - template <> constexpr bool is_enabled() { return (QUDA_RECONSTRUCT & 1) ? true : false; } - template <> constexpr bool is_enabled() { return true; } + constexpr bool is_enabled(QudaReconstructType reconstruct) + { + switch (reconstruct) { + case QUDA_RECONSTRUCT_NO: return (QUDA_RECONSTRUCT & 4) ? true : false; + case QUDA_RECONSTRUCT_13: return (QUDA_RECONSTRUCT & 2) ? true : false; + case QUDA_RECONSTRUCT_12: return (QUDA_RECONSTRUCT & 2) ? true : false; + case QUDA_RECONSTRUCT_9: return (QUDA_RECONSTRUCT & 1) ? true : false; + case QUDA_RECONSTRUCT_8: return (QUDA_RECONSTRUCT & 1) ? true : false; + case QUDA_RECONSTRUCT_10: return true; + default: return false; + } + } struct ReconstructFull { static constexpr std::array recon @@ -142,8 +138,8 @@ namespace quda void instantiateReconstruct(G &U, Args &&...args) { if (U.Reconstruct() == Recon::recon[i]) { - if constexpr (is_enabled()) - Apply(U, args...); + if constexpr (is_enabled(Recon::recon[i])) + Apply apply(U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable %d", QUDA_RECONSTRUCT, Recon::recon[i]); } else if constexpr (i > 0) { @@ -471,13 +467,13 @@ namespace quda constexpr void instantiateGaugeStaggered(G &U, Args &&...args) { if (U.Reconstruct() == QUDA_RECONSTRUCT_NO) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_NO)) // actual phase type doesn't matter because the phase is baked into the links Apply(U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable %d", QUDA_RECONSTRUCT, QUDA_RECONSTRUCT_NO); } else if (U.Reconstruct() == QUDA_RECONSTRUCT_13) { - if constexpr (is_enabled()) { + if constexpr (is_enabled(QUDA_RECONSTRUCT_13)) { if (U.StaggeredPhase() == QUDA_STAGGERED_PHASE_NO) Apply(U, args...); else if (U.StaggeredPhase() == QUDA_STAGGERED_PHASE_MILC) @@ -488,7 +484,7 @@ namespace quda errorQuda("QUDA_RECONSTRUCT=%d does not enable %d", QUDA_RECONSTRUCT, QUDA_RECONSTRUCT_13); } } else if (U.Reconstruct() == QUDA_RECONSTRUCT_12) { - if constexpr (is_enabled()) { + if constexpr (is_enabled(QUDA_RECONSTRUCT_12)) { errorQuda("QUDA_RECONSTRUCT=%d has not been implemented for HISQ gauge routines yet.", QUDA_RECONSTRUCT_12); } else { errorQuda("QUDA_RECONSTRUCT=%d does not enable %d\n", QUDA_RECONSTRUCT, QUDA_RECONSTRUCT_12); @@ -545,38 +541,43 @@ namespace quda @tparam dslash_type The dslash_type requested @return True if enabled, false if not */ - template constexpr bool is_enabled() { return false; } + constexpr bool is_enabled(QudaDslashType dslash_type) + { + switch (dslash_type) { #ifdef GPU_WILSON_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_WILSON_DSLASH: return true; #endif #ifdef GPU_CLOVER_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_CLOVER_WILSON_DSLASH: return true; #endif #ifdef GPU_CLOVER_HASENBUSCH_TWIST - template <> constexpr bool is_enabled() { return true; } + case QUDA_CLOVER_HASENBUSCH_TWIST_DSLASH: return true; #endif #ifdef GPU_DOMAIN_WALL_DIRAC - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } + case QUDA_DOMAIN_WALL_DSLASH: return true; + case QUDA_DOMAIN_WALL_4D_DSLASH: return true; + case QUDA_MOBIUS_DWF_DSLASH: return true; + case QUDA_MOBIUS_DWF_EOFA_DSLASH: return true; #endif #ifdef GPU_STAGGERED_DIRAC - template <> constexpr bool is_enabled() { return true; } - template <> constexpr bool is_enabled() { return true; } + case QUDA_STAGGERED_DSLASH: return true; + case QUDA_ASQTAD_DSLASH: return true; #endif #ifdef GPU_TWISTED_MASS_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_TWISTED_MASS_DSLASH: return true; #endif #ifdef GPU_TWISTED_CLOVER_DIRAC - template <> constexpr bool is_enabled() { return true; } + case QUDA_TWISTED_CLOVER_DSLASH: return true; #endif #ifdef GPU_LAPLACE - template <> constexpr bool is_enabled() { return true; } + case QUDA_LAPLACE_DSLASH: return true; #endif #ifdef GPU_COVDEV - template <> constexpr bool is_enabled() { return true; } + case QUDA_COVDEV_DSLASH: return true; #endif + default: return false; + } + } #ifdef GPU_DISTANCE_PRECONDITIONING constexpr bool is_enabled_distance_precondition() { return true; } diff --git a/include/instantiate_dslash.h b/include/instantiate_dslash.h index eab0ead243..d577c097af 100644 --- a/include/instantiate_dslash.h +++ b/include/instantiate_dslash.h @@ -22,17 +22,17 @@ namespace quda cvector_ref &x, const GaugeField &U, Args &&...args) { if (U.Reconstruct() == Recon::recon[0]) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_NO)) Apply(out, in, x, U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable reconstruct-18", QUDA_RECONSTRUCT); } else if (U.Reconstruct() == Recon::recon[1]) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_12)) Apply(out, in, x, U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable reconstruct-12/13", QUDA_RECONSTRUCT); } else if (U.Reconstruct() == Recon::recon[2]) { - if constexpr (is_enabled()) + if constexpr (is_enabled(QUDA_RECONSTRUCT_8)) Apply(out, in, x, U, args...); else errorQuda("QUDA_RECONSTRUCT=%d does not enable reconstruct-8/9", QUDA_RECONSTRUCT); diff --git a/include/kernel_helper.h b/include/kernel_helper.h index dcb33baba0..336c9fe297 100644 --- a/include/kernel_helper.h +++ b/include/kernel_helper.h @@ -2,6 +2,9 @@ #include "comm_quda.h" +#undef TRUE +#undef FALSE + namespace quda { diff --git a/include/kernels/dslash_mdw_fused.cuh b/include/kernels/dslash_mdw_fused.cuh index 67f98b30cc..a28180605d 100644 --- a/include/kernels/dslash_mdw_fused.cuh +++ b/include/kernels/dslash_mdw_fused.cuh @@ -165,7 +165,7 @@ namespace quda { bool ret = false; #pragma unroll for (int d = 0; d < 4; d++) { - ret = ret or (coordinate[d] >= dim[d] - halo_shift[d] or coordinate[d] < halo_shift[d]); + ret = ret || (coordinate[d] >= dim[d] - halo_shift[d] || coordinate[d] < halo_shift[d]); } return ret; } diff --git a/include/kernels/evec_project.cuh b/include/kernels/evec_project.cuh index 7bea55d908..cf8d7bdbfe 100644 --- a/include/kernels/evec_project.cuh +++ b/include/kernels/evec_project.cuh @@ -11,8 +11,8 @@ namespace quda { using spinor_array = array; - constexpr unsigned long max_nx = 4; - constexpr unsigned long max_ny = 4; + constexpr unsigned long long max_nx = 4; + constexpr unsigned long long max_ny = 4; template struct EvecProjectionArg : public ReduceArg diff --git a/include/kernels/gauge_random.cuh b/include/kernels/gauge_random.cuh index 92c2b1995e..82d4ffea40 100644 --- a/include/kernels/gauge_random.cuh +++ b/include/kernels/gauge_random.cuh @@ -52,6 +52,8 @@ namespace quda { temp2[i] *= radius[i]; } + printf("%f %f\n", phi[0], radius[0]); + // construct Anti-Hermitian matrix const real rsqrt_3 = quda::rsqrt(3.0); ret(0, 0) = complex(0.0, temp1[2] + rsqrt_3 * temp2[3]); @@ -82,12 +84,12 @@ namespace quda { getCoords(x, x_cb, arg.X, parity); for (int dr = 0; dr < 4; ++dr) x[dr] += arg.border[dr]; // extended grid coordinates - if (arg.group and arg.sigma == 0.0) { + if (arg.group && arg.sigma == 0.0) { // if sigma = 0 then we just set the output matrix to the identity and finish Link I; setIdentity(&I); for (int mu = 0; mu < 4; mu++) arg.U(mu, linkIndex(x, arg.E), parity) = I; - } else if (not arg.group and arg.sigma == 0.0) { + } else if (! arg.group && arg.sigma == 0.0) { // if sigma = 0 then we just set the output matrix to the zero and finish Link O = {}; for (int mu = 0; mu < 4; mu++) arg.U(mu, linkIndex(x, arg.E), parity) = O; diff --git a/include/multi_blas_helper.cuh b/include/multi_blas_helper.cuh index 93dd15249c..58e105f956 100644 --- a/include/multi_blas_helper.cuh +++ b/include/multi_blas_helper.cuh @@ -235,7 +235,7 @@ namespace quda static_assert(coeff_nyw != 0, "coeff_nyw is zero"); // additional limit since there's diminished benefit past a certain point - constexpr auto max_nyw = 128lu; + constexpr auto max_nyw = 128llu; return std::min(arg_nyw, std::min(coeff_nyw, max_nyw)); } @@ -290,7 +290,7 @@ namespace quda const auto coeff_nyw = Functor::coeff_mul ? max_array_size() / (NXZ * sizeof(typename Functor::coeff_t)) : arg_nyw; // additional limit since there's diminished benefit past a certain point - constexpr auto max_nyw = 128lu; + constexpr auto max_nyw = 128llu; return std::min(arg_nyw, std::min(coeff_nyw, max_nyw)); } diff --git a/include/polynomial.h b/include/polynomial.h index aa51d372ed..69eeef551d 100644 --- a/include/polynomial.h +++ b/include/polynomial.h @@ -1,5 +1,10 @@ #pragma once +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#include +#undef _USE_MATH_DEFINES +#endif #include #include #include diff --git a/include/quda.h b/include/quda.h index 155791b60a..fc1580e12f 100644 --- a/include/quda.h +++ b/include/quda.h @@ -15,7 +15,23 @@ #include #ifndef __CUDACC_RTC__ +#ifdef _MSC_VER +#ifdef __cplusplus +#include +extern "C" typedef struct { + double real, imag; + operator std::complex() const { return std::complex(real, imag); } + operator std::complex() const { return std::complex(real, imag); } +} _Dcomplex; +#else +typedef struct { + double real, imag; +} _Dcomplex; +#endif +#define double_complex _Dcomplex +#else #define double_complex double _Complex +#endif #else // keep NVRTC happy since it can't handle C types #define double_complex double2 #endif @@ -1856,15 +1872,17 @@ extern "C" { * @param[in] inv_param Meta-data structure * @param[in] X Lattice dimensions */ - void laphSinkProject(double _Complex *host_sinks, void **host_quark, int n_quark, int tile_quark, + void laphSinkProject(double_complex *host_sinks, void **host_quark, int n_quark, int tile_quark, void **host_evec, int nevec, int tile_evec, QudaInvertParam *inv_param, const int X[4]); #ifdef __cplusplus } #endif +#ifndef _INTERFACE_ // remove NVRTC WAR #undef double_complex +#endif /* #include */ diff --git a/include/quda_matrix.h b/include/quda_matrix.h index d3a306f68e..8290d09762 100644 --- a/include/quda_matrix.h +++ b/include/quda_matrix.h @@ -995,11 +995,11 @@ namespace quda { // if its argument (tmp) is zero and then return unity. Complex tmp = a3 + sg2h3; - if (tmp.real() == 0 and tmp.imag() == 0) { + if (tmp.real() == 0 && tmp.imag() == 0) { // Making sure q is a zero matrix bool iszero = true; for (int i = 0; i < 9; i++) { - if (q(i).real() != 0 or q(i).imag() != 0) { + if (q(i).real() != 0 || q(i).imag() != 0) { iszero = false; break; } diff --git a/include/reliable_updates.h b/include/reliable_updates.h index ec70fb4f3c..b87e8ce393 100644 --- a/include/reliable_updates.h +++ b/include/reliable_updates.h @@ -130,8 +130,8 @@ namespace quda { if (params.alternative_reliable) { // alternative reliable updates - updateX = ((d <= deps * sqrt(r2_old)) or (dfac * dinit > deps * r0Norm)) and (d_new > deps * rNorm) - and (d_new > dfac * dinit); + updateX = ((d <= deps * sqrt(r2_old)) || (dfac * dinit > deps * r0Norm)) && (d_new > deps * rNorm) + && (d_new > dfac * dinit); updateR = 0; } else { if (rNorm > maxrx) maxrx = rNorm; @@ -205,19 +205,19 @@ namespace quda bool reliable_break(double r2, double stop, bool &L2breakdown, double L2breakdown_eps) { // break-out check if we have reached the limit of the precision - if (sqrt(r2) > r0Norm && updateX and not L2breakdown) { // reuse r0Norm for this + if (sqrt(r2) > r0Norm && updateX && ! L2breakdown) { // reuse r0Norm for this resIncrease++; resIncreaseTotal++; warningQuda("new reliable residual norm %e is greater than previous reliable residual norm %e (total #inc %i)", sqrt(r2), r0Norm, resIncreaseTotal); - if ((params.use_heavy_quark_res and sqrt(r2) < L2breakdown_eps) or resIncrease > params.maxResIncrease - or resIncreaseTotal > params.maxResIncreaseTotal or r2 < stop) { + if ((params.use_heavy_quark_res && sqrt(r2) < L2breakdown_eps) || resIncrease > params.maxResIncrease + || resIncreaseTotal > params.maxResIncreaseTotal || r2 < stop) { if (params.use_heavy_quark_res) { L2breakdown = true; warningQuda("L2 breakdown %e, %e", sqrt(r2), L2breakdown_eps); } else { - if (resIncrease > params.maxResIncrease or resIncreaseTotal > params.maxResIncreaseTotal or r2 < stop) { + if (resIncrease > params.maxResIncrease || resIncreaseTotal > params.maxResIncreaseTotal || r2 < stop) { warningQuda("solver exiting due to too many true residual norm increases"); return true; } @@ -239,7 +239,7 @@ namespace quda bool reliable_heavy_quark_break(bool L2breakdown, double heavy_quark_res, double heavy_quark_res_old, bool &heavy_quark_restart) { - if (params.use_heavy_quark_res and L2breakdown) { + if (params.use_heavy_quark_res && L2breakdown) { hqresRestartTotal++; // count the number of heavy quark restarts we've done delta = 0; warningQuda("CG: Restarting without reliable updates for heavy-quark residual (total #inc %i)", diff --git a/include/targets/cuda/block_reduction_kernel.h b/include/targets/cuda/block_reduction_kernel.h index 27551430c8..2bb75affaa 100644 --- a/include/targets/cuda/block_reduction_kernel.h +++ b/include/targets/cuda/block_reduction_kernel.h @@ -129,13 +129,13 @@ namespace quda per thread (in the x dimension). Not supported at present. @param[in] arg Kernel argument */ - template