diff --git a/src/ArbLattice.cpp b/src/ArbLattice.cpp index e2521888..db12efd1 100644 --- a/src/ArbLattice.cpp +++ b/src/ArbLattice.cpp @@ -22,7 +22,6 @@ ArbLattice::ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map< } void ArbLattice::initialize(size_t num_snaps_, const std::map& setting_zones, pugi::xml_node arb_node) { - const int rank = mpitools::MPI_Rank(comm); sizes.snaps = num_snaps_; #ifdef ADJOINT sizes.snaps += 2; // Adjoint snaps are appended to the total snap allocation @@ -896,6 +895,36 @@ void ArbLattice::resetAverage(){ } } +void ArbLattice::getSample(int quant, unsigned int lid, real_t scale, real_t *buf) { + setSnapIn(Snap); +#ifdef ADJOINT + setAdjSnapIn(aSnap); +#endif + launcher.sampleQuantity(quant, lid, buf, scale, data); +} + + +void ArbLattice::updateAllSamples(){ + const int rank = mpitools::MPI_Rank(comm); + if (sample->size != 0) { + for (size_t j = 0; j < sample->spoints.size(); j++) { + if (rank == sample->spoints[j].rank) { + for(const Model::Quantity& q : model->quantities) { + if (sample->quant->in(q.name.c_str())){ + double v = sample->units->alt(q.unit.c_str()); + getSample(q.id, sample->spoints[j].lid, 1/v, + &sample->gpu_buffer[sample->location[q.name.c_str()]+(data.iter - sample->startIter)*sample->size + sample->totalIter*j*sample->size]); + } + } + } + } + } +} + +unsigned int ArbLattice::getCartesianCoordinateLid(vector_t point) const { + return launcher.getCartesianCoordinateLid(point, data); +} + ArbLattice::~ArbLattice() { RFI.Close(); diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index 5349db6c..a0b267c9 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -96,14 +96,18 @@ class ArbLattice : public LatticeBase { virtual void setFlags(const std::vector& x) override; virtual void setField(const Model::Field& f, const std::vector& x) override; virtual void setFieldAdjZero(const Model::Field& f) override; + virtual void updateAllSamples() override; const ArbVTUGeom& getVTUGeom() const { return vtu_geom; } Span getNodeTypes() const { return {node_types_host.data(), node_types_host.size()}; } /// Get host view of node types (permuted) const ArbLatticeConnectivity& getConnectivity() const { return connect; } const std::vector& getLocalPermutation() const { return local_permutation; } + unsigned int getCartesianCoordinateLid(vector_t point) const; + void getSample(int quant, unsigned int lid, real_t scale, real_t *buf); + void resetAverage(); - lbRegion getLocalBoundingBox() const override; /// Compute local bounding box, assuming the arbitrary lattice is a subset of a Cartesian lattice + lbRegion getLocalBoundingBox() const override; /// Compute local bounding box, assuming the arbitrary lattice is a subset of a Cartesian lattice protected: ArbLatticeLauncher launcher; /// Launcher responsible for running CUDA kernels on the lattice diff --git a/src/ArbLatticeLauncher.h.Rt b/src/ArbLatticeLauncher.h.Rt index 31c604a9..eb5fbbca 100644 --- a/src/ArbLatticeLauncher.h.Rt +++ b/src/ArbLatticeLauncher.h.Rt @@ -22,6 +22,10 @@ struct ArbLatticeLauncher { void getQuantity(int quant, real_t* host_tab, real_t scale, const LatticeData& data) const; + unsigned int getCartesianCoordinateLid(const vector_t point, const LatticeData& data) const; + + void sampleQuantity(int quant, unsigned int lid, real_t* host_tab, real_t scale, const LatticeData &data) const; + private: void getQuantity(* tab, real_t scale, const LatticeData& data) const; + + void getSample(* tab, real_t scale, const LatticeData& data, unsigned int lid) const; }; diff --git a/src/ArbLatticeLauncher.hpp.Rt b/src/ArbLatticeLauncher.hpp.Rt index ecddb46d..41d78e62 100644 --- a/src/ArbLatticeLauncher.hpp.Rt +++ b/src/ArbLatticeLauncher.hpp.Rt @@ -34,7 +34,8 @@ struct ArbLatticeExecutor : public LinearExecutor { template void ArbLatticeLauncher::RunBorder(CudaStream_t stream, const LatticeData& data) const { - const ArbLatticeExecutor executor{{container.num_border_nodes}, container, data, 0}; + const ArbLatticeExecutor executor{{container.num_border_nodes}, + container, data, 0}; LaunchExecutorAsync(executor, stream); } @@ -99,6 +100,7 @@ struct GetQuantityArbExecutor : public LinearExecutor { LatticeData data; * buf; real_t scale; + unsigned int offset; // Starting offset for iteration space CudaDeviceFunction void Execute() const { using LA = ArbLatticeAccess; }?> const int i = threadID(CudaThread, CudaBlock, CudaNumberOfThreads); if (inRange(i)) { - LA acc(i, container); + const int index = i + offset; + LA acc(index, container); N now(acc, data); acc.pop(now); @@ -129,10 +132,68 @@ if (q$type == "vector_t") { }; void ArbLatticeLauncher::getQuantity(* tab, real_t scale, const LatticeData& data) const { - const GetQuantityArbExecutor executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, tab, scale}; + const GetQuantityArbExecutor executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, tab, scale, 0}; + LaunchExecutor(executor); +} + +void ArbLatticeLauncher::getSample(* tab, real_t scale, const LatticeData& data, unsigned int lid) const { + const GetQuantityArbExecutor executor{{1}, container, data, tab, scale, lid}; LaunchExecutor(executor); } + +struct CartesianCoordinateMapperArbExecutor: public LinearExecutor { + ArbLatticeContainer container; + LatticeData data; + vector_t point; // cartesian point coordinates + real_t epsilon2; // should be a small value (e.g. < 1e-10) to match with only one point + unsigned int *matched_lid; + + CudaDeviceFunction void Execute() const { + using LA = ArbLatticeAccess; + const int i = threadID(CudaThread, CudaBlock, CudaNumberOfThreads); + if (inRange(i)) { + LA acc(i, container); + real_t X = acc.getX(); + real_t Y = acc.getY(); + real_t Z = acc.getZ(); + real_t distance = (X - point.x)*(X - point.x) + (Y - point.y)*(Y - point.y) + (Z - point.z)*(Z - point.z); + if (distance < epsilon2) { + *matched_lid= i; + } + } + } +}; + + +unsigned int ArbLatticeLauncher::getCartesianCoordinateLid(const vector_t point, const LatticeData& data) const { + const auto gpu_lid = cudaMakeUnique(1); + const real_t tol2 = 1e-10; + const CartesianCoordinateMapperArbExecutor executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, point, tol2, + gpu_lid.get()}; + LaunchExecutor(executor); + + unsigned int matched_lid; + CudaMemcpy(&matched_lid, gpu_lid.get(), sizeof(unsigned int), CudaMemcpyDeviceToHost); + return matched_lid; +} + +void ArbLatticeLauncher::sampleQuantity(int quant, unsigned int lid, real_t* host_tab, real_t scale, const LatticeData &data) const { + switch(quant) { + case : { + const auto gpu_tab = cudaMakeUnique<>(1); + getSample(gpu_tab.get(), scale, data, lid); + CudaMemcpy(host_tab, gpu_tab.get(), sizeof(), CudaMemcpyDeviceToHost); + break; + } + } +} + #endif // ARBLATTICELAUNCHER_HPP diff --git a/src/CartLattice.cpp.Rt b/src/CartLattice.cpp.Rt index 02dcf472..8cf711d5 100644 --- a/src/CartLattice.cpp.Rt +++ b/src/CartLattice.cpp.Rt @@ -63,7 +63,6 @@ CartLattice::CartLattice(CartConnectivity connect, int ns, const UnitEnv& units_ { DEBUG_M; AllocContainer(launcher.container, getLocalRegion().nx, getLocalRegion().ny, getLocalRegion().nz); - sample = std::make_unique(model.get(), units, connectivity.mpi_rank); Snaps = std::make_unique(num_snaps); setPosition(0.0,0.0,0.0); DEBUG_M; @@ -721,8 +720,9 @@ void CartLattice::GetSample(const lbRegion& over, real_t scale,rea launcher.container.in = Snaps[Snap]; launcher.container.adjin = aSnaps[aSnap]; - lbRegion small = getLocalRegion().intersect(over); - launcher.SampleQuantity(small, (*)buf, scale, data); + lbRegion local_reg = getLocalRegion(); + lbRegion small_local = over.shift(local_reg.dx, local_reg.dy, local_reg.dz); + launcher.SampleQuantity(small_local, (*)buf, scale, data); } void CartLattice::updateAllSamples(){ diff --git a/src/CartLattice.h.Rt b/src/CartLattice.h.Rt index 5572a17e..1a45931e 100644 --- a/src/CartLattice.h.Rt +++ b/src/CartLattice.h.Rt @@ -8,7 +8,6 @@ #include "LatticeBase.hpp" #include "CartLatticeLauncher.h" #include "CartConnectivity.hpp" -#include "Sampler.h" #include "Geometry.h" #include @@ -73,7 +72,6 @@ protected: #endif public: - std::unique_ptr sample; //initializing sampler with zero size real_t px, py, pz; CartLattice (CartConnectivity connect, int ns, const UnitEnv& units_); @@ -148,7 +146,7 @@ public: int getPar(const ParStruct& par_struct, double * wb); int setPar(const ParStruct& par_struct, double * w); - void updateAllSamples(); + virtual void updateAllSamples() override; void resetAverage(); }; diff --git a/src/CartLatticeLauncher.hpp.Rt b/src/CartLatticeLauncher.hpp.Rt index e4e6f47a..97a28f80 100644 --- a/src/CartLatticeLauncher.hpp.Rt +++ b/src/CartLatticeLauncher.hpp.Rt @@ -307,7 +307,7 @@ public: GetQuantitySampleExecutor(Args&&... args) : GetQuantityExecutorBase(std::forward(args)...) {} CudaHostFunction LaunchParams ComputeLaunchParams(dim3) const { - return LaunchParams{dim3(small.nx, small.ny), dim3(1)}; + return LaunchParams{dim3(small.nx, small.ny, small.nz), dim3(1)}; } }; diff --git a/src/Handlers/cbSample.cpp b/src/Handlers/cbSample.cpp index 295488d8..0f3ccbe7 100644 --- a/src/Handlers/cbSample.cpp +++ b/src/Handlers/cbSample.cpp @@ -16,7 +16,6 @@ int cbSample::Init () { else { s.add_from_string("all",','); } - const auto lattice = solver->getCartLattice(); for (pugi::xml_node par = node.first_child(); par; par = par.next_sibling()) { if (strcmp(par.name(),"Point") == 0) { lbRegion loc; @@ -32,33 +31,44 @@ int cbSample::Init () { if (attr) { loc.dz = solver->units.alt(attr.value()); } - loc = lattice->getLocalRegion().intersect(loc); - if (loc.nx == 1) lattice->sample->addPoint(loc, solver->mpi_rank); + loc = solver->lattice->getLocalBoundingBox().intersect(loc); + + if (loc.nx == 1) { + unsigned int lid = 0; + auto variant = solver->getLatticeVariant(); + if (auto* lattice = std::get_if*>(&variant)) { + // cache lid for arbitrary lattice + const real_t offset = 0.5; + vector_t point{real_t(loc.dx) + offset, real_t(loc.dy) + offset, real_t(loc.dz) + offset}; + lid = (*lattice)->getCartesianCoordinateLid(point); + } + solver->lattice->sample->addPoint(loc, solver->mpi_rank, lid); + } + } else { error("Uknown element in Sampler\n"); return -1; } } filename = solver->outIterFile(nm, ".csv"); - lattice->sample->units = &solver->units; - lattice->sample->mpi_rank = solver->mpi_rank; - lattice->sample->Allocate(&s,startIter,everyIter); - lattice->sample->initCSV(filename.c_str()); + solver->lattice->sample->units = &solver->units; + solver->lattice->sample->mpi_rank = solver->mpi_rank; + solver->lattice->sample->Allocate(&s,startIter,everyIter); + solver->lattice->sample->initCSV(filename.c_str()); return 0; } int cbSample::DoIt () { Callback::DoIt(); - const auto lattice = solver->getCartLattice(); - lattice->sample->writeHistory(solver->iter); - lattice->sample->startIter = solver->iter; + solver->lattice->sample->writeHistory(solver->iter); + solver->lattice->sample->startIter = solver->iter; return 0; } int cbSample::Finish () { - solver->getCartLattice()->sample->Finish(); + solver->lattice->sample->Finish(); return Callback::Finish(); } diff --git a/src/Lattice.hpp.Rt b/src/Lattice.hpp.Rt index de3ac6bf..70cc4732 100644 --- a/src/Lattice.hpp.Rt +++ b/src/Lattice.hpp.Rt @@ -109,8 +109,7 @@ struct Lattice : public LatticeType { CudaDeviceSynchronize(); LatticeType::Snap = tab_out; LatticeType::MarkIteration(); - if constexpr(std::is_same_v) /// TODO - LatticeType::updateAllSamples(); + LatticeType::updateAllSamples(); DEBUG_PROF_POP(); } diff --git a/src/LatticeBase.cpp.Rt b/src/LatticeBase.cpp.Rt index 0460f290..3150a9f5 100644 --- a/src/LatticeBase.cpp.Rt +++ b/src/LatticeBase.cpp.Rt @@ -5,6 +5,7 @@ c_header(); #include "LatticeBase.hpp" #include "utils.h" +#include "Global.h" LatticeBase::LatticeBase(int zonesettings, int zones, int num_snaps_, const UnitEnv& units_) : model(std::make_unique()), zSet(zonesettings, zones), num_snaps(num_snaps_), units(&units_) { @@ -24,6 +25,8 @@ LatticeBase::LatticeBase(int zonesettings, int zones, int num_snaps_, const Unit data.ConstZoneSettings = zSet.gpuConst; std::fill_n(iSnaps.get(), maxSnaps, -1); + sample = std::make_unique(model.get(), units, D_MPI_RANK); + // Setting settings to default diff --git a/src/LatticeBase.hpp b/src/LatticeBase.hpp index 292952ef..dbbfde2f 100644 --- a/src/LatticeBase.hpp +++ b/src/LatticeBase.hpp @@ -10,6 +10,7 @@ #include "SolidContainer.h" #include "SyntheticTurbulence.h" #include "ZoneSettings.h" +#include "Sampler.h" #include "Region.h" #include "cross.h" #include "unit.h" @@ -68,6 +69,7 @@ class LatticeBase { bool RFI_omega, RFI_torque; SyntheticTurbulence ST; ///< std::string snapFileName; + std::unique_ptr sample; ///< sampler of quantities protected: static constexpr int maxSnaps = 33; @@ -122,6 +124,8 @@ class LatticeBase { virtual void setField(const Model::Field& f, const std::vector& x) = 0; virtual void setFieldAdjZero(const Model::Field& f) = 0; + virtual void updateAllSamples() = 0; + void CopyInParticles(); void CopyOutParticles(); diff --git a/src/Region.h b/src/Region.h index 9d2f197c..00e2d68a 100644 --- a/src/Region.h +++ b/src/Region.h @@ -28,6 +28,10 @@ struct lbRegion { if (ret.nx <= 0 || ret.ny <= 0 || ret.nz <= 0) { ret.nx = ret.ny = ret.nz = 0; }; return ret; }; + lbRegion shift(int dxs, int dys, int dzs) const { + lbRegion ret(dx - dxs, dy - dys, dz - dzs, nx, ny, nz); + return ret; + } int offset(int x,int y) const { return (x-dx) + (y-dy) * nx; }; diff --git a/src/Sampler.cpp b/src/Sampler.cpp index 755302b2..cf203053 100644 --- a/src/Sampler.cpp +++ b/src/Sampler.cpp @@ -80,10 +80,11 @@ int Sampler::Allocate(name_set* nquantities,int start,int iter) { return 0; } -int Sampler::addPoint(lbRegion loc,int rank){ +int Sampler::addPoint(lbRegion loc,int rank, unsigned int lid){ sreg temp; temp.location = loc; temp.rank = rank; + temp.lid = lid; spoints.push_back(temp); return 0; } diff --git a/src/Sampler.h b/src/Sampler.h index bfb7384a..7a43b757 100644 --- a/src/Sampler.h +++ b/src/Sampler.h @@ -23,6 +23,7 @@ Each point and corresponding output data is controlled by the mpi rank related t struct sreg { int rank; lbRegion location; + unsigned int lid; // location index cache for arbitrary grid }; class Sampler { typedef std::map< std::string , int > Location; @@ -42,7 +43,7 @@ class Sampler { int initCSV(const char* name); int writeHistory(int curr_iter); int Allocate(name_set* quantities,int total_iter,int iter); - int addPoint(lbRegion loc,int rank); + int addPoint(lbRegion loc,int rank, unsigned int lid = 0); const char *filename; int Finish(); };