From 9849fc25da3bc59659c874a9d7fa34b4e983c283 Mon Sep 17 00:00:00 2001 From: Dmytro Sashko Date: Tue, 3 Mar 2026 10:27:00 +1000 Subject: [PATCH 1/4] Implement sampler for the arbitrary grid --- src/ArbLattice.cpp | 31 ++++++++++- src/ArbLattice.hpp | 7 ++- src/ArbLatticeLauncher.h.Rt | 6 +++ src/ArbLatticeLauncher.hpp.Rt | 68 +++++++++++++++++++++++-- src/CartLattice.cpp.Rt | 1 - src/CartLattice.h.Rt | 11 ++-- src/Handlers/acRemoteForceInterface.cpp | 2 +- src/Handlers/cbSample.cpp | 32 ++++++++---- src/Lattice.hpp.Rt | 3 +- src/LatticeBase.cpp.Rt | 3 ++ src/LatticeBase.hpp | 8 +++ src/Sampler.cpp | 3 +- src/Sampler.h | 3 +- 13 files changed, 153 insertions(+), 25 deletions(-) diff --git a/src/ArbLattice.cpp b/src/ArbLattice.cpp index a24f1dda0..76bb89c2b 100644 --- a/src/ArbLattice.cpp +++ b/src/ArbLattice.cpp @@ -22,7 +22,6 @@ ArbLattice::ArbLattice(size_t num_snaps_, const UnitEnv& units_, const std::map< } void ArbLattice::initialize(size_t num_snaps_, const std::map& setting_zones, pugi::xml_node arb_node) { - const int rank = mpitools::MPI_Rank(comm); sizes.snaps = num_snaps_; #ifdef ADJOINT sizes.snaps += 2; // Adjoint snaps are appended to the total snap allocation @@ -895,3 +894,33 @@ void ArbLattice::resetAverage(){ } } } + +void ArbLattice::getSample(int quant, unsigned int lid, real_t scale, real_t *buf) { + setSnapIn(Snap); +#ifdef ADJOINT + setAdjSnapIn(aSnap); +#endif + launcher.sampleQuantity(quant, lid, buf, scale, data); +} + + +void ArbLattice::updateAllSamples(){ + const int rank = mpitools::MPI_Rank(comm); + if (sample->size != 0) { + for (size_t j = 0; j < sample->spoints.size(); j++) { + if (rank == sample->spoints[j].rank) { + for(const Model::Quantity& q : model->quantities) { + if (sample->quant->in(q.name.c_str())){ + double v = sample->units->alt(q.unit.c_str()); + getSample(q.id, sample->spoints[j].lid, 1/v, + &sample->gpu_buffer[sample->location[q.name.c_str()]+(data.iter - sample->startIter)*sample->size + sample->totalIter*j*sample->size]); + } + } + } + } + } +} + +unsigned int ArbLattice::getCartesianCoordinateLid(vector_t point) const { + return launcher.getCartesianCoordinateLid(point, data); +} diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index 9857dc23b..fbb3b8b29 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -96,13 +96,19 @@ class ArbLattice : public LatticeBase { virtual void setFlags(const std::vector& x); virtual void setField(const Model::Field& f, const std::vector& x); virtual void setFieldAdjZero(const Model::Field& f); + + virtual void updateAllSamples() override; const ArbVTUGeom& getVTUGeom() const { return vtu_geom; } Span getNodeTypes() const { return {node_types_host.data(), node_types_host.size()}; } /// Get host view of node types (permuted) const ArbLatticeConnectivity& getConnectivity() const { return connect; } const std::vector& getLocalPermutation() const { return local_permutation; } + unsigned int getCartesianCoordinateLid(vector_t point) const; + void getSample(int quant, unsigned int lid, real_t scale, real_t *buf); + void resetAverage(); + lbRegion getLocalBoundingBox() const override; /// Compute local bounding box, assuming the arbitrary lattice is a subset of a Cartesian lattice protected: ArbLatticeLauncher launcher; /// Launcher responsible for running CUDA kernels on the lattice @@ -152,7 +158,6 @@ class ArbLattice : public LatticeBase { void initCommManager(); /// Compute which fields need to be sent to/received from which neighbors void initContainer(); /// Initialize the data residing in launcher.container int fullLatticePos(double pos) const; /// Compute the position (in terms of lattice offsets) of a node, assuming the arbitrary lattice is a subset of a Cartesian lattice - lbRegion getLocalBoundingBox() const; /// Compute local bounding box, assuming the arbitrary lattice is a subset of a Cartesian lattice ArbVTUGeom makeVTUGeom() const; /// Compute VTU geometry void communicateBorder(); /// Send and receive border values in snap (overlapped with interior computation) unsigned lookupLocalGhostIndex(ArbLatticeConnectivity::Index gid) const; /// For a given ghost gid, look up its local id diff --git a/src/ArbLatticeLauncher.h.Rt b/src/ArbLatticeLauncher.h.Rt index 31c604a97..eb5fbbca0 100644 --- a/src/ArbLatticeLauncher.h.Rt +++ b/src/ArbLatticeLauncher.h.Rt @@ -22,6 +22,10 @@ struct ArbLatticeLauncher { void getQuantity(int quant, real_t* host_tab, real_t scale, const LatticeData& data) const; + unsigned int getCartesianCoordinateLid(const vector_t point, const LatticeData& data) const; + + void sampleQuantity(int quant, unsigned int lid, real_t* host_tab, real_t scale, const LatticeData &data) const; + private: void getQuantity(* tab, real_t scale, const LatticeData& data) const; + + void getSample(* tab, real_t scale, const LatticeData& data, unsigned int lid) const; }; diff --git a/src/ArbLatticeLauncher.hpp.Rt b/src/ArbLatticeLauncher.hpp.Rt index ecddb46d8..7d4f8ee96 100644 --- a/src/ArbLatticeLauncher.hpp.Rt +++ b/src/ArbLatticeLauncher.hpp.Rt @@ -34,7 +34,8 @@ struct ArbLatticeExecutor : public LinearExecutor { template void ArbLatticeLauncher::RunBorder(CudaStream_t stream, const LatticeData& data) const { - const ArbLatticeExecutor executor{{container.num_border_nodes}, container, data, 0}; + const ArbLatticeExecutor executor{{container.num_border_nodes}, + container, data, 0}; LaunchExecutorAsync(executor, stream); } @@ -99,6 +100,7 @@ struct GetQuantityArbExecutor : public LinearExecutor { LatticeData data; * buf; real_t scale; + unsigned int offset; // Starting offset for iteration space CudaDeviceFunction void Execute() const { using LA = ArbLatticeAccess; }?> const int i = threadID(CudaThread, CudaBlock, CudaNumberOfThreads); if (inRange(i)) { - LA acc(i, container); + const int index = i + offset; + LA acc(index, container); N now(acc, data); acc.pop(now); @@ -129,10 +132,69 @@ if (q$type == "vector_t") { }; void ArbLatticeLauncher::getQuantity(* tab, real_t scale, const LatticeData& data) const { - const GetQuantityArbExecutor executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, tab, scale}; + const GetQuantityArbExecutor executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, tab, scale, 0}; + LaunchExecutor(executor); +} + +void ArbLatticeLauncher::getSample(* tab, real_t scale, const LatticeData& data, unsigned int lid) const { + const GetQuantityArbExecutor executor{{1}, container, data, tab, scale, lid}; LaunchExecutor(executor); } + +struct CartesianCoordinateMapperArbExecutor: public LinearExecutor { + ArbLatticeContainer container; + LatticeData data; + vector_t point; // cartesian point coordinates + real_t epsilon2; // should be a small value (e.g. < 1e-10) to match with only one point + unsigned int *matched_lid; + + CudaDeviceFunction void Execute() const { + using LA = ArbLatticeAccess; + using N = Node< LA, Primal, NoGlobals, Get >; + const int i = threadID(CudaThread, CudaBlock, CudaNumberOfThreads); + if (inRange(i)) { + LA acc(i, container); + real_t X = acc.getX(); + real_t Y = acc.getY(); + real_t Z = acc.getZ(); + real_t distance = (X - point.x)*(X - point.x) + (Y - point.y)*(Y - point.y) + (Z - point.z)*(Z - point.z); + if (distance < epsilon2) { + *matched_lid= i; + } + } + } +}; + + +unsigned int ArbLatticeLauncher::getCartesianCoordinateLid(const vector_t point, const LatticeData& data) const { + const auto gpu_lid = cudaMakeUnique(1); + const real_t tol2 = 1e-10; + const CartesianCoordinateMapperArbExecutor executor{{container.num_border_nodes + container.num_interior_nodes}, container, data, point, tol2, + gpu_lid.get()}; + LaunchExecutor(executor); + + unsigned int matched_lid; + CudaMemcpy(&matched_lid, gpu_lid.get(), sizeof(unsigned int), CudaMemcpyDeviceToHost); + return matched_lid; +} + +void ArbLatticeLauncher::sampleQuantity(int quant, unsigned int lid, real_t* host_tab, real_t scale, const LatticeData &data) const { + switch(quant) { + case : { + const auto gpu_tab = cudaMakeUnique<>(1); + getSample(gpu_tab.get(), scale, data, lid); + CudaMemcpy(host_tab, gpu_tab.get(), sizeof(), CudaMemcpyDeviceToHost); + break; + } + } +} + #endif // ARBLATTICELAUNCHER_HPP diff --git a/src/CartLattice.cpp.Rt b/src/CartLattice.cpp.Rt index 02dcf4722..b583db0b8 100644 --- a/src/CartLattice.cpp.Rt +++ b/src/CartLattice.cpp.Rt @@ -63,7 +63,6 @@ CartLattice::CartLattice(CartConnectivity connect, int ns, const UnitEnv& units_ { DEBUG_M; AllocContainer(launcher.container, getLocalRegion().nx, getLocalRegion().ny, getLocalRegion().nz); - sample = std::make_unique(model.get(), units, connectivity.mpi_rank); Snaps = std::make_unique(num_snaps); setPosition(0.0,0.0,0.0); DEBUG_M; diff --git a/src/CartLattice.h.Rt b/src/CartLattice.h.Rt index bebc6f05f..1a45931e0 100644 --- a/src/CartLattice.h.Rt +++ b/src/CartLattice.h.Rt @@ -8,7 +8,6 @@ #include "LatticeBase.hpp" #include "CartLatticeLauncher.h" #include "CartConnectivity.hpp" -#include "Sampler.h" #include "Geometry.h" #include @@ -73,7 +72,6 @@ protected: #endif public: - std::unique_ptr sample; //initializing sampler with zero size real_t px, py, pz; CartLattice (CartConnectivity connect, int ns, const UnitEnv& units_); @@ -85,6 +83,13 @@ public: const lbRegion& getLocalRegion() const { return connectivity.getLocalRegion(); } const lbRegion& getGlobalRegion() const { return connectivity.global_region; } + + lbRegion getLocalBoundingBox() const override { + lbRegion reg = getLocalRegion(); + return lbRegion(px + reg.dx, py + reg.dy, pz + reg.dz, + reg.nx, reg.ny, reg.nz); + } + size_t getLocalSize() const override { return getLocalRegion().sizeL(); } size_t getGlobalSize() const override { return getGlobalRegion().sizeL(); } @@ -141,7 +146,7 @@ public: int getPar(const ParStruct& par_struct, double * wb); int setPar(const ParStruct& par_struct, double * w); - void updateAllSamples(); + virtual void updateAllSamples() override; void resetAverage(); }; diff --git a/src/Handlers/acRemoteForceInterface.cpp b/src/Handlers/acRemoteForceInterface.cpp index ca0a0ce14..a08ef0f93 100644 --- a/src/Handlers/acRemoteForceInterface.cpp +++ b/src/Handlers/acRemoteForceInterface.cpp @@ -106,7 +106,7 @@ int acRemoteForceInterface::ConnectRemoteForceInterface(std::string integrator_) MPI_Barrier(MPMD.local); lattice->RFI.Connect(MPMD.work,inter.work); - + return 0; } diff --git a/src/Handlers/cbSample.cpp b/src/Handlers/cbSample.cpp index 295488d85..0f3ccbe75 100644 --- a/src/Handlers/cbSample.cpp +++ b/src/Handlers/cbSample.cpp @@ -16,7 +16,6 @@ int cbSample::Init () { else { s.add_from_string("all",','); } - const auto lattice = solver->getCartLattice(); for (pugi::xml_node par = node.first_child(); par; par = par.next_sibling()) { if (strcmp(par.name(),"Point") == 0) { lbRegion loc; @@ -32,33 +31,44 @@ int cbSample::Init () { if (attr) { loc.dz = solver->units.alt(attr.value()); } - loc = lattice->getLocalRegion().intersect(loc); - if (loc.nx == 1) lattice->sample->addPoint(loc, solver->mpi_rank); + loc = solver->lattice->getLocalBoundingBox().intersect(loc); + + if (loc.nx == 1) { + unsigned int lid = 0; + auto variant = solver->getLatticeVariant(); + if (auto* lattice = std::get_if*>(&variant)) { + // cache lid for arbitrary lattice + const real_t offset = 0.5; + vector_t point{real_t(loc.dx) + offset, real_t(loc.dy) + offset, real_t(loc.dz) + offset}; + lid = (*lattice)->getCartesianCoordinateLid(point); + } + solver->lattice->sample->addPoint(loc, solver->mpi_rank, lid); + } + } else { error("Uknown element in Sampler\n"); return -1; } } filename = solver->outIterFile(nm, ".csv"); - lattice->sample->units = &solver->units; - lattice->sample->mpi_rank = solver->mpi_rank; - lattice->sample->Allocate(&s,startIter,everyIter); - lattice->sample->initCSV(filename.c_str()); + solver->lattice->sample->units = &solver->units; + solver->lattice->sample->mpi_rank = solver->mpi_rank; + solver->lattice->sample->Allocate(&s,startIter,everyIter); + solver->lattice->sample->initCSV(filename.c_str()); return 0; } int cbSample::DoIt () { Callback::DoIt(); - const auto lattice = solver->getCartLattice(); - lattice->sample->writeHistory(solver->iter); - lattice->sample->startIter = solver->iter; + solver->lattice->sample->writeHistory(solver->iter); + solver->lattice->sample->startIter = solver->iter; return 0; } int cbSample::Finish () { - solver->getCartLattice()->sample->Finish(); + solver->lattice->sample->Finish(); return Callback::Finish(); } diff --git a/src/Lattice.hpp.Rt b/src/Lattice.hpp.Rt index de3ac6bf8..70cc47324 100644 --- a/src/Lattice.hpp.Rt +++ b/src/Lattice.hpp.Rt @@ -109,8 +109,7 @@ struct Lattice : public LatticeType { CudaDeviceSynchronize(); LatticeType::Snap = tab_out; LatticeType::MarkIteration(); - if constexpr(std::is_same_v) /// TODO - LatticeType::updateAllSamples(); + LatticeType::updateAllSamples(); DEBUG_PROF_POP(); } diff --git a/src/LatticeBase.cpp.Rt b/src/LatticeBase.cpp.Rt index 0460f2904..3150a9f52 100644 --- a/src/LatticeBase.cpp.Rt +++ b/src/LatticeBase.cpp.Rt @@ -5,6 +5,7 @@ c_header(); #include "LatticeBase.hpp" #include "utils.h" +#include "Global.h" LatticeBase::LatticeBase(int zonesettings, int zones, int num_snaps_, const UnitEnv& units_) : model(std::make_unique()), zSet(zonesettings, zones), num_snaps(num_snaps_), units(&units_) { @@ -24,6 +25,8 @@ LatticeBase::LatticeBase(int zonesettings, int zones, int num_snaps_, const Unit data.ConstZoneSettings = zSet.gpuConst; std::fill_n(iSnaps.get(), maxSnaps, -1); + sample = std::make_unique(model.get(), units, D_MPI_RANK); + // Setting settings to default diff --git a/src/LatticeBase.hpp b/src/LatticeBase.hpp index 275d3ec70..dbbfde2f7 100644 --- a/src/LatticeBase.hpp +++ b/src/LatticeBase.hpp @@ -10,6 +10,8 @@ #include "SolidContainer.h" #include "SyntheticTurbulence.h" #include "ZoneSettings.h" +#include "Sampler.h" +#include "Region.h" #include "cross.h" #include "unit.h" @@ -67,6 +69,7 @@ class LatticeBase { bool RFI_omega, RFI_torque; SyntheticTurbulence ST; ///< std::string snapFileName; + std::unique_ptr sample; ///< sampler of quantities protected: static constexpr int maxSnaps = 33; @@ -93,6 +96,9 @@ class LatticeBase { public: virtual size_t getLocalSize() const = 0; virtual size_t getGlobalSize() const = 0; + + virtual lbRegion getLocalBoundingBox() const = 0; + void initLattice(); /// Called by handlers template @@ -118,6 +124,8 @@ class LatticeBase { virtual void setField(const Model::Field& f, const std::vector& x) = 0; virtual void setFieldAdjZero(const Model::Field& f) = 0; + virtual void updateAllSamples() = 0; + void CopyInParticles(); void CopyOutParticles(); diff --git a/src/Sampler.cpp b/src/Sampler.cpp index 755302b25..cf2030534 100644 --- a/src/Sampler.cpp +++ b/src/Sampler.cpp @@ -80,10 +80,11 @@ int Sampler::Allocate(name_set* nquantities,int start,int iter) { return 0; } -int Sampler::addPoint(lbRegion loc,int rank){ +int Sampler::addPoint(lbRegion loc,int rank, unsigned int lid){ sreg temp; temp.location = loc; temp.rank = rank; + temp.lid = lid; spoints.push_back(temp); return 0; } diff --git a/src/Sampler.h b/src/Sampler.h index bfb7384a3..7a43b7579 100644 --- a/src/Sampler.h +++ b/src/Sampler.h @@ -23,6 +23,7 @@ Each point and corresponding output data is controlled by the mpi rank related t struct sreg { int rank; lbRegion location; + unsigned int lid; // location index cache for arbitrary grid }; class Sampler { typedef std::map< std::string , int > Location; @@ -42,7 +43,7 @@ class Sampler { int initCSV(const char* name); int writeHistory(int curr_iter); int Allocate(name_set* quantities,int total_iter,int iter); - int addPoint(lbRegion loc,int rank); + int addPoint(lbRegion loc,int rank, unsigned int lid = 0); const char *filename; int Finish(); }; From b9fbeda545cd139975f6e102d2249af43ddf25be Mon Sep 17 00:00:00 2001 From: Dmytro Sashko Date: Wed, 4 Mar 2026 15:55:23 +1000 Subject: [PATCH 2/4] Fix sampler on cartesian lattice The problem was that the "intersection" of regions does not convert to the local coordinates, so the "over" coordinates were incorrecte (still global) --- src/CartLattice.cpp.Rt | 5 +++-- src/CartLatticeLauncher.hpp.Rt | 2 +- src/Region.h | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/CartLattice.cpp.Rt b/src/CartLattice.cpp.Rt index b583db0b8..8cf711d52 100644 --- a/src/CartLattice.cpp.Rt +++ b/src/CartLattice.cpp.Rt @@ -720,8 +720,9 @@ void CartLattice::GetSample(const lbRegion& over, real_t scale,rea launcher.container.in = Snaps[Snap]; launcher.container.adjin = aSnaps[aSnap]; - lbRegion small = getLocalRegion().intersect(over); - launcher.SampleQuantity(small, (*)buf, scale, data); + lbRegion local_reg = getLocalRegion(); + lbRegion small_local = over.shift(local_reg.dx, local_reg.dy, local_reg.dz); + launcher.SampleQuantity(small_local, (*)buf, scale, data); } void CartLattice::updateAllSamples(){ diff --git a/src/CartLatticeLauncher.hpp.Rt b/src/CartLatticeLauncher.hpp.Rt index e4e6f47a7..97a28f803 100644 --- a/src/CartLatticeLauncher.hpp.Rt +++ b/src/CartLatticeLauncher.hpp.Rt @@ -307,7 +307,7 @@ public: GetQuantitySampleExecutor(Args&&... args) : GetQuantityExecutorBase(std::forward(args)...) {} CudaHostFunction LaunchParams ComputeLaunchParams(dim3) const { - return LaunchParams{dim3(small.nx, small.ny), dim3(1)}; + return LaunchParams{dim3(small.nx, small.ny, small.nz), dim3(1)}; } }; diff --git a/src/Region.h b/src/Region.h index 9d2f197ca..00e2d68a8 100644 --- a/src/Region.h +++ b/src/Region.h @@ -28,6 +28,10 @@ struct lbRegion { if (ret.nx <= 0 || ret.ny <= 0 || ret.nz <= 0) { ret.nx = ret.ny = ret.nz = 0; }; return ret; }; + lbRegion shift(int dxs, int dys, int dzs) const { + lbRegion ret(dx - dxs, dy - dys, dz - dzs, nx, ny, nz); + return ret; + } int offset(int x,int y) const { return (x-dx) + (y-dy) * nx; }; From c6798676c303aebd3cdb160d36fc55eef0613164 Mon Sep 17 00:00:00 2001 From: Dmytro Sashko Date: Wed, 11 Mar 2026 16:23:24 +1000 Subject: [PATCH 3/4] Remove unused --- src/ArbLatticeLauncher.hpp.Rt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/ArbLatticeLauncher.hpp.Rt b/src/ArbLatticeLauncher.hpp.Rt index 7d4f8ee96..41d78e628 100644 --- a/src/ArbLatticeLauncher.hpp.Rt +++ b/src/ArbLatticeLauncher.hpp.Rt @@ -153,7 +153,6 @@ struct CartesianCoordinateMapperArbExecutor: public LinearExecutor { CudaDeviceFunction void Execute() const { using LA = ArbLatticeAccess; - using N = Node< LA, Primal, NoGlobals, Get >; const int i = threadID(CudaThread, CudaBlock, CudaNumberOfThreads); if (inRange(i)) { LA acc(i, container); From e6e18e564ceb857d81370dd3f93119b4ddc15c68 Mon Sep 17 00:00:00 2001 From: Dmytro Sashko Date: Thu, 12 Mar 2026 16:03:03 +1000 Subject: [PATCH 4/4] Fixup overrides of virtual functions Were not triggered previously because destructor was virtual. Now more consistent with CartLattice --- src/ArbLattice.hpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/ArbLattice.hpp b/src/ArbLattice.hpp index fbb3b8b29..816ffd936 100644 --- a/src/ArbLattice.hpp +++ b/src/ArbLattice.hpp @@ -86,17 +86,16 @@ class ArbLattice : public LatticeBase { size_t getGlobalSize() const final { return connect.num_nodes_global; } - virtual std::vector shape() const { return {static_cast(getLocalSize())}; }; - virtual std::vector getQuantity(const Model::Quantity& q, real_t scale = 1) ; - virtual std::vector getFlags() const; - virtual std::vector getField(const Model::Field& f); - virtual std::vector getFieldAdj(const Model::Field& f); - virtual std::vector getCoord(const Model::Coord& q, real_t scale = 1); - - virtual void setFlags(const std::vector& x); - virtual void setField(const Model::Field& f, const std::vector& x); - virtual void setFieldAdjZero(const Model::Field& f); - + virtual std::vector shape() const override { return {static_cast(getLocalSize())}; }; + virtual std::vector getQuantity(const Model::Quantity& q, real_t scale = 1) override ; + virtual std::vector getFlags() const override; + virtual std::vector getField(const Model::Field& f) override; + virtual std::vector getFieldAdj(const Model::Field& f) override; + virtual std::vector getCoord(const Model::Coord& q, real_t scale = 1) override; + + virtual void setFlags(const std::vector& x) override; + virtual void setField(const Model::Field& f, const std::vector& x) override; + virtual void setFieldAdjZero(const Model::Field& f) override; virtual void updateAllSamples() override; const ArbVTUGeom& getVTUGeom() const { return vtu_geom; }