From 3c04d96add93897b55c3a3f210384a34994fb16c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 3 Apr 2025 19:16:33 -0600 Subject: [PATCH 01/66] COMP: missing variables --- .../Kinetic_Energy_Minimize_Shape_Opt.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h b/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h index a0585999e..ecc174d43 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h +++ b/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h @@ -489,7 +489,9 @@ typedef MV::dual_view_type dual_vec_array; auto corners_in_node = FEM_SGH_->corners_in_node; auto num_corners_in_node = FEM_SGH_->num_corners_in_node; auto relative_element_densities = FEM_SGH_->relative_element_densities; + int max_nodes_per_element = FEM_SGH_->max_nodes_per_element; double volume_gradients_array[max_nodes_per_element * num_dim]; + auto elem_den = FEM_SGH_->elem_den; ViewCArrayKokkos volume_gradients(volume_gradients_array, max_nodes_per_element, num_dim); // view scope { @@ -531,6 +533,12 @@ typedef MV::dual_view_type dual_vec_array; } } } + + // cut out the node_gids for this element + ViewCArrayKokkos elem_node_gids(&nodes_in_elem(elem_id, 0), 8); + + // gradients of the element volume + FEM_SGH_->get_vol_hex_ugradient(volume_gradients, elem_id, node_coords, elem_node_gids, rk_level); for (int inode = 0; inode < num_nodes_in_elem; inode++) { for(int idim = 0; idim < num_dim; idim++){ @@ -561,10 +569,10 @@ typedef MV::dual_view_type dual_vec_array; } // cut out the node_gids for this element - ViewCArrayKokkos elem_node_gids(&nodes_in_elem(elem_gid, 0), 8); + ViewCArrayKokkos elem_node_gids(&nodes_in_elem(elem_id, 0), 8); // gradients of the element volume - get_vol_hex_ugradient(volume_gradients, elem_gid, node_coords, elem_node_gids, rk_level); + FEM_SGH_->get_vol_hex_ugradient(volume_gradients, elem_id, node_coords, elem_node_gids, rk_level); inner_product = 0; for (int ifill = 0; ifill < num_nodes_in_elem; ifill++) { From 55fb899fab4041a509c54e128b3e70f2983dfed0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 7 Apr 2025 18:21:12 -0600 Subject: [PATCH 02/66] WIP: make refactor MPI parallel --- .../scripts/trilinos-install.sh | 2 +- single-node-refactor/src/main.cpp | 54 +++++++++++++------ 2 files changed, 39 insertions(+), 17 deletions(-) diff --git a/single-node-refactor/scripts/trilinos-install.sh b/single-node-refactor/scripts/trilinos-install.sh index fb32c9005..2d5508ad0 100644 --- a/single-node-refactor/scripts/trilinos-install.sh +++ b/single-node-refactor/scripts/trilinos-install.sh @@ -13,7 +13,7 @@ echo "Trilinos Kokkos Build Type: $kokkos_build_type" if [ ! -d "${TRILINOS_SOURCE_DIR}" ] then echo "Directory Trilinos does not exist, downloading Trilinos...." - git clone --depth 1 https://github.com/trilinos/Trilinos.git ${TRILINOS_SOURCE_DIR} + git clone --depth 1 --branch trilinos-release-16-0-branch https://github.com/trilinos/Trilinos.git ${TRILINOS_SOURCE_DIR} fi #check if Trilinos build directory exists, create Trilinos/build if it doesn't diff --git a/single-node-refactor/src/main.cpp b/single-node-refactor/src/main.cpp index f3b1db91d..c6476a894 100644 --- a/single-node-refactor/src/main.cpp +++ b/single-node-refactor/src/main.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include "matar.h" #include "driver.h" @@ -51,21 +52,33 @@ /// ///////////////////////////////////////////////////////////////////////////// int main(int argc, char* argv[]) -{ +{ + // initialize MPI + MPI_Init(&argc, &argv); + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + bool will_run = false; + // check to see of an input file was supplied when running the code if (argc == 1) { - std::cout << "\n\n**********************************\n\n"; - std::cout << " ERROR:\n"; - std::cout << " Please supply a YAML input, \n"; - std::cout << " ./Fierro input.yaml \n\n"; - std::cout << "**********************************\n\n" << std::endl; + if(myrank == 0){ + std::cout << "\n\n**********************************\n\n"; + std::cout << " ERROR:\n"; + std::cout << " Please supply a YAML input, \n"; + std::cout << " mpirun -np n Fierro input.yaml \n\n"; + std::cout << "**********************************\n\n" << std::endl; + } + + MPI_Finalize(); return 0; } // end if if (std::string(argv[1]) == "--help"){ - - print_inputs(); - + if(myrank == 0){ + print_inputs(); + } + MPI_Finalize(); return 0; } @@ -75,7 +88,6 @@ int main(int argc, char* argv[]) // Create driver Driver* driver = new Driver(argv[1]); - // Timing data for each step auto time_start = std::chrono::high_resolution_clock::now(); auto time_init = std::chrono::high_resolution_clock::now(); @@ -84,28 +96,35 @@ int main(int argc, char* argv[]) auto time_now = std::chrono::high_resolution_clock::now(); auto calc_time = std::chrono::duration_cast(time_now - time_init).count(); - printf("\n**** Total time to initialize driver in seconds %f ****\n\n", calc_time * 1e-9); + if(myrank == 0){ + printf("\n**** Total time to initialize driver in seconds %f ****\n\n", calc_time * 1e-9); + } auto time_setup = std::chrono::high_resolution_clock::now(); driver->setup(); time_now = std::chrono::high_resolution_clock::now(); calc_time = std::chrono::duration_cast(time_now - time_setup).count(); - printf("\n**** Total time to setup driver in seconds %f ****\n\n", calc_time * 1e-9); + if(myrank == 0){ + printf("\n**** Total time to setup driver in seconds %f ****\n\n", calc_time * 1e-9); + } auto time_run = std::chrono::high_resolution_clock::now(); driver->execute(); time_now = std::chrono::high_resolution_clock::now(); calc_time = std::chrono::duration_cast(time_now - time_setup).count(); - printf("\n**** Total time to execute driver in seconds %f ****\n\n", calc_time * 1e-9); + if(myrank == 0){ + printf("\n**** Total time to execute driver in seconds %f ****\n\n", calc_time * 1e-9); + } driver->finalize(); time_now = std::chrono::high_resolution_clock::now(); calc_time = std::chrono::duration_cast(time_now - time_start).count(); - - printf("\n**** Total time to run simulation in seconds %f ****\n\n", calc_time * 1e-9); + if(myrank == 0){ + printf("\n**** Total time to run simulation in seconds %f ****\n\n", calc_time * 1e-9); + } // Delete driver delete driver; @@ -113,6 +132,9 @@ int main(int argc, char* argv[]) Kokkos::finalize(); - std::cout << "**** End of main **** " << std::endl; + if(myrank == 0){ + std::cout << "**** End of main **** " << std::endl; + } + MPI_Finalize(); return 0; } From 61dd3c936759613588dc826243ca9e8590a10f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Wed, 9 Apr 2025 10:09:35 -0600 Subject: [PATCH 03/66] STYLE: correct spelling --- .../SGH_solver_3D/include/sgh_solver_3D.h | 10 +- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 32 ++-- .../SGH_solver_3D/src/sgh_initialize.cpp | 16 +- .../Solvers/SGH_solver_3D/src/sgh_setup.cpp | 4 +- .../SGH_solver_rz/include/sgh_solver_rz.h | 10 +- .../SGH_solver_rz/src/sgh_execute_rz.cpp | 32 ++-- .../SGH_solver_rz/src/sgh_initialize_rz.cpp | 8 +- .../SGH_solver_rz/src/sgh_setup_rz.cpp | 4 +- .../SGTM_solver_3D/include/sgtm_solver_3D.h | 10 +- .../SGTM_solver_3D/src/sgtm_execute.cpp | 32 ++-- .../SGTM_solver_3D/src/sgtm_initialize.cpp | 16 +- .../Solvers/SGTM_solver_3D/src/sgtm_setup.cpp | 4 +- .../src/common/include/mesh_io.h | 144 +++++++++--------- .../src/common/include/region_fill.h | 6 +- .../src/common/src/region_fill.cpp | 22 +-- single-node-refactor/src/driver.cpp | 50 +++--- single-node-refactor/src/driver.h | 2 +- single-node-refactor/src/input/parse_yaml.cpp | 26 ++-- single-node-refactor/src/input/parse_yaml.hpp | 2 +- single-node-refactor/src/solver.h | 10 +- 20 files changed, 220 insertions(+), 220 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h index cdf797acd..1f9a11cf9 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h @@ -153,14 +153,14 @@ class SGH3D : public Solver /// \brief Initializes data associated with the SGH3D solver /// ///////////////////////////////////////////////////////////////////////////// - void initialize(SimulationParameters_t& SimulationParamaters, + void initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) const override; - void initialize_material_state(SimulationParameters_t& SimulationParamaters, + void initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -173,7 +173,7 @@ class SGH3D : public Solver /// \brief Calls setup_sgh, which initializes state and material data /// ///////////////////////////////////////////////////////////////////////////// - void setup(SimulationParameters_t& SimulationParamaters, + void setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -187,7 +187,7 @@ class SGH3D : public Solver /// /// ///////////////////////////////////////////////////////////////////////////// - void execute(SimulationParameters_t& SimulationParamaters, + void execute(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary, Mesh_t& mesh, @@ -209,7 +209,7 @@ class SGH3D : public Solver /// \return /// ///////////////////////////////////////////////////////////////////////////// - void finalize(SimulationParameters_t& SimulationParamaters, + void finalize(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary) const override { diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index e26ff9482..bc0519fdc 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -49,29 +49,29 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// Evolve the state according to the SGH method /// ///////////////////////////////////////////////////////////////////////////// -void SGH3D::execute(SimulationParameters_t& SimulationParamaters, +void SGH3D::execute(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& BoundaryConditions, Mesh_t& mesh, State_t& State) { - double fuzz = SimulationParamaters.dynamic_options.fuzz; - // double tiny = SimulationParamaters.dynamic_options.tiny; - double small = SimulationParamaters.dynamic_options.small; + double fuzz = SimulationParameters.dynamic_options.fuzz; + // double tiny = SimulationParameters.dynamic_options.tiny; + double small = SimulationParameters.dynamic_options.small; - double graphics_dt_ival = SimulationParamaters.output_options.graphics_time_step; - int graphics_cyc_ival = SimulationParamaters.output_options.graphics_iteration_step; + double graphics_dt_ival = SimulationParameters.output_options.graphics_time_step; + int graphics_cyc_ival = SimulationParameters.output_options.graphics_iteration_step; - // double time_initial = SimulationParamaters.dynamic_options.time_initial; - double time_final = this->time_end; //SimulationParamaters.dynamic_options.time_final; - double dt_min = SimulationParamaters.dynamic_options.dt_min; - double dt_max = SimulationParamaters.dynamic_options.dt_max; - double dt_start = SimulationParamaters.dynamic_options.dt_start; - double dt_cfl = SimulationParamaters.dynamic_options.dt_cfl; + // double time_initial = SimulationParameters.dynamic_options.time_initial; + double time_final = this->time_end; //SimulationParameters.dynamic_options.time_final; + double dt_min = SimulationParameters.dynamic_options.dt_min; + double dt_max = SimulationParameters.dynamic_options.dt_max; + double dt_start = SimulationParameters.dynamic_options.dt_start; + double dt_cfl = SimulationParameters.dynamic_options.dt_cfl; - int rk_num_stages = SimulationParamaters.dynamic_options.rk_num_stages; - int cycle_stop = SimulationParamaters.dynamic_options.cycle_stop; + int rk_num_stages = SimulationParameters.dynamic_options.rk_num_stages; + int cycle_stop = SimulationParameters.dynamic_options.cycle_stop; // initialize time, time_step, and cycles double time_value = this->time_start; // was 0.0 @@ -146,7 +146,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParamaters, mesh_writer.write_mesh( mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, @@ -420,7 +420,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParamaters, printf("Writing outputs to file at %f \n", graphics_time); mesh_writer.write_mesh(mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp index c8782b3ce..db92686d9 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp @@ -38,7 +38,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "simulation_parameters.h" -void SGH3D::initialize(SimulationParameters_t& SimulationParamaters, +void SGH3D::initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -47,7 +47,7 @@ void SGH3D::initialize(SimulationParameters_t& SimulationParamaters, const size_t num_nodes = mesh.num_nodes; const size_t num_gauss_pts = mesh.num_elems; const size_t num_corners = mesh.num_corners; - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; const size_t num_dims = mesh.num_dims; @@ -59,7 +59,7 @@ void SGH3D::initialize(SimulationParameters_t& SimulationParamaters, // check that the fills specify the required nodal fields bool filled_nodal_state = check_fill_node_states(SGH3D_State::required_fill_node_state, - SimulationParamaters.region_setups.fill_node_states); + SimulationParameters.region_setups.fill_node_states); if (filled_nodal_state == false){ std::cout <<" Missing required nodal state in the fill instructions for the dynx_FE solver \n"; @@ -72,14 +72,14 @@ void SGH3D::initialize(SimulationParameters_t& SimulationParamaters, -void SGH3D::initialize_material_state(SimulationParameters_t& SimulationParamaters, +void SGH3D::initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) const { const size_t num_nodes = mesh.num_nodes; - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; const size_t num_dims = 3; const size_t num_mats = Materials.num_mats; // the number of materials on the mesh @@ -111,15 +111,15 @@ void SGH3D::initialize_material_state(SimulationParameters_t& SimulationParamate // check that the fills specify the required material point state fields bool filled_material_state_A = check_fill_mat_states(SGH3D_State::required_optA_fill_material_pt_state, - SimulationParamaters.region_setups.fill_gauss_states); + SimulationParameters.region_setups.fill_gauss_states); bool filled_material_state_B = check_fill_mat_states(SGH3D_State::required_optB_fill_material_pt_state, - SimulationParamaters.region_setups.fill_gauss_states); + SimulationParameters.region_setups.fill_gauss_states); // --- full stress tensor is not yet supported in region_fill --- //bool filled_material_state_C = // check_fill_mat_states(SGH3D_State::required_optC_fill_material_pt_state, - // SimulationParamaters.region_setups.fill_gauss_states); + // SimulationParameters.region_setups.fill_gauss_states); if (filled_material_state_A == false && filled_material_state_B == false){ diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp index 1880ce8cb..837e7d26e 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp @@ -51,7 +51,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// \brief Allocate state, setup models, and fill mesh regions per the YAML input /// ///////////////////////////////////////////////////////////////////////////// -void SGH3D::setup(SimulationParameters_t& SimulationParamaters, +void SGH3D::setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -60,7 +60,7 @@ void SGH3D::setup(SimulationParameters_t& SimulationParamaters, // add a flag on whether SGH was set up, if(SGH_setup_already==false) const size_t num_mats = Materials.num_mats; // the number of materials on the mesh - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; // calculate pressure, sound speed, and stress for each material for (int mat_id = 0; mat_id < num_mats; mat_id++) { diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h index 2ad0c39dd..bfc634f56 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h @@ -125,13 +125,13 @@ class SGHRZ : public Solver /// \brief Initializes data associated with the SGHRZ solver /// ///////////////////////////////////////////////////////////////////////////// - void initialize(SimulationParameters_t& SimulationParamaters, + void initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) const override; - void initialize_material_state(SimulationParameters_t& SimulationParamaters, + void initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -144,7 +144,7 @@ class SGHRZ : public Solver /// \brief Calls setup_sgh_rz, which initializes state and material data /// ///////////////////////////////////////////////////////////////////////////// - void setup(SimulationParameters_t& SimulationParamaters, + void setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -158,7 +158,7 @@ class SGHRZ : public Solver /// /// ///////////////////////////////////////////////////////////////////////////// - void execute(SimulationParameters_t& SimulationParamaters, + void execute(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary, Mesh_t& mesh, @@ -180,7 +180,7 @@ class SGHRZ : public Solver /// \return /// ///////////////////////////////////////////////////////////////////////////// - void finalize(SimulationParameters_t& SimulationParamaters, + void finalize(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary) const override { diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp index 780c7e3e2..ade4761a7 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp @@ -49,29 +49,29 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// Evolve the state according to the SGH method /// ///////////////////////////////////////////////////////////////////////////// -void SGHRZ::execute(SimulationParameters_t& SimulationParamaters, +void SGHRZ::execute(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& BoundaryConditions, Mesh_t& mesh, State_t& State) { - double fuzz = SimulationParamaters.dynamic_options.fuzz; - double tiny = SimulationParamaters.dynamic_options.tiny; - double small = SimulationParamaters.dynamic_options.small; + double fuzz = SimulationParameters.dynamic_options.fuzz; + double tiny = SimulationParameters.dynamic_options.tiny; + double small = SimulationParameters.dynamic_options.small; - double graphics_dt_ival = SimulationParamaters.output_options.graphics_time_step; - int graphics_cyc_ival = SimulationParamaters.output_options.graphics_iteration_step; + double graphics_dt_ival = SimulationParameters.output_options.graphics_time_step; + int graphics_cyc_ival = SimulationParameters.output_options.graphics_iteration_step; - // double time_initial = SimulationParamaters.dynamic_options.time_initial; - double time_final = this->time_end; //SimulationParamaters.dynamic_options.time_final; - double dt_min = SimulationParamaters.dynamic_options.dt_min; - double dt_max = SimulationParamaters.dynamic_options.dt_max; - double dt_start = SimulationParamaters.dynamic_options.dt_start; - double dt_cfl = SimulationParamaters.dynamic_options.dt_cfl; + // double time_initial = SimulationParameters.dynamic_options.time_initial; + double time_final = this->time_end; //SimulationParameters.dynamic_options.time_final; + double dt_min = SimulationParameters.dynamic_options.dt_min; + double dt_max = SimulationParameters.dynamic_options.dt_max; + double dt_start = SimulationParameters.dynamic_options.dt_start; + double dt_cfl = SimulationParameters.dynamic_options.dt_cfl; - int rk_num_stages = SimulationParamaters.dynamic_options.rk_num_stages; - int cycle_stop = SimulationParamaters.dynamic_options.cycle_stop; + int rk_num_stages = SimulationParameters.dynamic_options.rk_num_stages; + int cycle_stop = SimulationParameters.dynamic_options.cycle_stop; // initialize time, time_step, and cycles double time_value = this->time_start; // 0.0; @@ -158,7 +158,7 @@ void SGHRZ::execute(SimulationParameters_t& SimulationParamaters, mesh_writer.write_mesh( mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, @@ -431,7 +431,7 @@ void SGHRZ::execute(SimulationParameters_t& SimulationParamaters, printf("Writing outputs to file at %f \n", graphics_time); mesh_writer.write_mesh(mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp index 7a79538e7..755909184 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp @@ -37,7 +37,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "mesh.h" #include "simulation_parameters.h" -void SGHRZ::initialize(SimulationParameters_t& SimulationParamaters, +void SGHRZ::initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -46,7 +46,7 @@ void SGHRZ::initialize(SimulationParameters_t& SimulationParamaters, size_t num_nodes = mesh.num_nodes; size_t num_gauss_pts = mesh.num_elems; size_t num_corners = mesh.num_corners; - size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; size_t num_dim = mesh.num_dims; // save the solver_id, which is a pravate class variable @@ -59,14 +59,14 @@ void SGHRZ::initialize(SimulationParameters_t& SimulationParamaters, // NOTE: Material points and material corners are initialize in sgh_setup after calculating the material->mesh maps } -void SGHRZ::initialize_material_state(SimulationParameters_t& SimulationParamaters, +void SGHRZ::initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) const { const size_t num_nodes = mesh.num_nodes; - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; const size_t num_mats = Materials.num_mats; // the number of materials on the mesh diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp index 215624f5f..15e8997a9 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp @@ -78,7 +78,7 @@ void SGHRZ::init_corner_node_masses_zero_rz(const Mesh_t& mesh, /// \brief Allocate state, setup models, and fill mesh regions per the YAML input /// ///////////////////////////////////////////////////////////////////////////// -void SGHRZ::setup(SimulationParameters_t& SimulationParamaters, +void SGHRZ::setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -88,7 +88,7 @@ void SGHRZ::setup(SimulationParameters_t& SimulationParamaters, // add a flag on whether SGHRZ was set up, if(SGHRZ_setup_already==false) const size_t num_mats = Materials.num_mats; // the number of materials on the mesh - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; // calculate pressure, sound speed, and stress for each material for(int mat_id=0; mat_id /// ///////////////////////////////////////////////////////////////////////////// - void finalize(SimulationParameters_t& SimulationParamaters, + void finalize(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary) const override { diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_execute.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_execute.cpp index aa6c99e4d..31d7c59f9 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_execute.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_execute.cpp @@ -49,29 +49,29 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// Evolve the state according to the SGH method /// ///////////////////////////////////////////////////////////////////////////// -void SGTM3D::execute(SimulationParameters_t& SimulationParamaters, +void SGTM3D::execute(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& BoundaryConditions, Mesh_t& mesh, State_t& State) { - double fuzz = SimulationParamaters.dynamic_options.fuzz; - // double tiny = SimulationParamaters.dynamic_options.tiny; - double small = SimulationParamaters.dynamic_options.small; + double fuzz = SimulationParameters.dynamic_options.fuzz; + // double tiny = SimulationParameters.dynamic_options.tiny; + double small = SimulationParameters.dynamic_options.small; - double graphics_dt_ival = SimulationParamaters.output_options.graphics_time_step; - int graphics_cyc_ival = SimulationParamaters.output_options.graphics_iteration_step; + double graphics_dt_ival = SimulationParameters.output_options.graphics_time_step; + int graphics_cyc_ival = SimulationParameters.output_options.graphics_iteration_step; - // double time_initial = SimulationParamaters.dynamic_options.time_initial; - double time_final = this->time_end; // SimulationParamaters.dynamic_options.time_final; - double dt_min = SimulationParamaters.dynamic_options.dt_min; - double dt_max = SimulationParamaters.dynamic_options.dt_max; - double dt_start = SimulationParamaters.dynamic_options.dt_start; - double dt_cfl = SimulationParamaters.dynamic_options.dt_cfl; + // double time_initial = SimulationParameters.dynamic_options.time_initial; + double time_final = this->time_end; // SimulationParameters.dynamic_options.time_final; + double dt_min = SimulationParameters.dynamic_options.dt_min; + double dt_max = SimulationParameters.dynamic_options.dt_max; + double dt_start = SimulationParameters.dynamic_options.dt_start; + double dt_cfl = SimulationParameters.dynamic_options.dt_cfl; - int rk_num_stages = SimulationParamaters.dynamic_options.rk_num_stages; - int cycle_stop = SimulationParamaters.dynamic_options.cycle_stop; + int rk_num_stages = SimulationParameters.dynamic_options.rk_num_stages; + int cycle_stop = SimulationParameters.dynamic_options.cycle_stop; // initialize time, time_step, and cycles double time_value = this->time_start; // 0.0; @@ -119,7 +119,7 @@ void SGTM3D::execute(SimulationParameters_t& SimulationParamaters, mesh_writer.write_mesh( mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, @@ -367,7 +367,7 @@ std::cout << "update temperature \n"; printf("cycle = %lu, time = %f, time step = %f \n", cycle, time_value, dt); mesh_writer.write_mesh(mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp index f7dcff4b9..d3847b4d0 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp @@ -37,7 +37,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "mesh.h" #include "simulation_parameters.h" -void SGTM3D::initialize(SimulationParameters_t& SimulationParamaters, +void SGTM3D::initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -46,7 +46,7 @@ void SGTM3D::initialize(SimulationParameters_t& SimulationParamaters, int num_nodes = mesh.num_nodes; int num_gauss_pts = mesh.num_elems; int num_corners = mesh.num_corners; - int rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + int rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; int num_dim = mesh.num_dims; @@ -59,7 +59,7 @@ void SGTM3D::initialize(SimulationParameters_t& SimulationParamaters, // check that the fills specify the required nodal fields bool filled_nodal_state = check_fill_node_states(SGTM3D_State::required_fill_node_state, - SimulationParamaters.region_setups.fill_node_states); + SimulationParameters.region_setups.fill_node_states); if (filled_nodal_state == false){ std::cout <<" Missing required nodal state in the fill instructions for the thrmex_FE solver \n"; @@ -71,7 +71,7 @@ void SGTM3D::initialize(SimulationParameters_t& SimulationParamaters, } // end solver initialization -void SGTM3D::initialize_material_state(SimulationParameters_t& SimulationParamaters, +void SGTM3D::initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -79,7 +79,7 @@ void SGTM3D::initialize_material_state(SimulationParameters_t& SimulationParamat { const size_t num_nodes = mesh.num_nodes; - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; const size_t num_dims = 3; const size_t num_mats = Materials.num_mats; // the number of materials on the mesh @@ -111,15 +111,15 @@ void SGTM3D::initialize_material_state(SimulationParameters_t& SimulationParamat // check that the fills specify the required material point state fields bool filled_material_state_A = check_fill_mat_states(SGTM3D_State::required_optA_fill_material_pt_state, - SimulationParamaters.region_setups.fill_gauss_states); + SimulationParameters.region_setups.fill_gauss_states); bool filled_material_state_B = check_fill_mat_states(SGTM3D_State::required_optB_fill_material_pt_state, - SimulationParamaters.region_setups.fill_gauss_states); + SimulationParameters.region_setups.fill_gauss_states); // --- full stress tensor is not yet supported in region_fill --- //bool filled_material_state_C = // check_fill_mat_states(SGTM3D_State::required_optC_fill_material_pt_state, - // SimulationParamaters.region_setups.fill_gauss_states); + // SimulationParameters.region_setups.fill_gauss_states); if (filled_material_state_A == false && filled_material_state_B == false){ diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_setup.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_setup.cpp index 3e04307d0..b7e2b6750 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_setup.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_setup.cpp @@ -50,7 +50,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// \brief Calls setup_sgtm to unpack SimulationParameters for GPU access /// ///////////////////////////////////////////////////////////////////////////// -void SGTM3D::setup(SimulationParameters_t& SimulationParamaters, +void SGTM3D::setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -58,7 +58,7 @@ void SGTM3D::setup(SimulationParameters_t& SimulationParamaters, { const size_t num_mats = Materials.num_mats; // the number of materials on the mesh - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_stages; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_stages; std::cout << "Calculating pressure, sound speed, and stress" << std::endl; diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 08cd53e9e..0cad1075c 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1319,14 +1319,14 @@ class MeshBuilder GaussPoint_t& GaussPoints, node_t& node, corner_t& corner, - SimulationParameters_t& SimulationParamaters) + SimulationParameters_t& SimulationParameters) { - if (SimulationParamaters.mesh_input.num_dims == 2) { - if (SimulationParamaters.mesh_input.type == mesh_input::Polar) { - build_2d_polar(mesh, GaussPoints, node, corner, SimulationParamaters); + if (SimulationParameters.mesh_input.num_dims == 2) { + if (SimulationParameters.mesh_input.type == mesh_input::Polar) { + build_2d_polar(mesh, GaussPoints, node, corner, SimulationParameters); } - else if (SimulationParamaters.mesh_input.type == mesh_input::Box) { - build_2d_box(mesh, GaussPoints, node, corner, SimulationParamaters); + else if (SimulationParameters.mesh_input.type == mesh_input::Box) { + build_2d_box(mesh, GaussPoints, node, corner, SimulationParameters); } else{ std::cout << "**** 2D MESH TYPE NOT SUPPORTED **** " << std::endl; @@ -1338,8 +1338,8 @@ class MeshBuilder throw std::runtime_error("**** 2D MESH TYPE NOT SUPPORTED ****"); } } - else if (SimulationParamaters.mesh_input.num_dims == 3) { - build_3d_box(mesh, GaussPoints, node, corner, SimulationParamaters); + else if (SimulationParameters.mesh_input.num_dims == 3) { + build_3d_box(mesh, GaussPoints, node, corner, SimulationParameters); } else{ throw std::runtime_error("**** ONLY 2D RZ OR 3D MESHES ARE SUPPORTED ****"); @@ -1363,17 +1363,17 @@ class MeshBuilder GaussPoint_t& GaussPoints, node_t& node, corner_t& corner, - SimulationParameters_t& SimulationParamaters) const + SimulationParameters_t& SimulationParameters) const { printf("Creating a 2D box mesh \n"); const int num_dim = 2; - const double lx = SimulationParamaters.mesh_input.length[0]; - const double ly = SimulationParamaters.mesh_input.length[1]; + const double lx = SimulationParameters.mesh_input.length[0]; + const double ly = SimulationParameters.mesh_input.length[1]; - const int num_elems_i = SimulationParamaters.mesh_input.num_elems[0]; - const int num_elems_j = SimulationParamaters.mesh_input.num_elems[1]; + const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; const int num_points_i = num_elems_i + 1; // num points in x const int num_points_j = num_elems_j + 1; // num points in y @@ -1386,8 +1386,8 @@ class MeshBuilder const int num_elems = num_elems_i * num_elems_j; std::vector origin(num_dim); - // SimulationParamaters.mesh_input.origin.update_host(); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParamaters.mesh_input.origin[i]; } + // SimulationParameters.mesh_input.origin.update_host(); + for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } // --- 2D parameters --- // const int num_faces_in_elem = 4; // number of faces in elem @@ -1404,7 +1404,7 @@ class MeshBuilder convert_point_number_in_quad(2) = 3; convert_point_number_in_quad(3) = 2; - int rk_num_bins = SimulationParamaters.dynamic_options.rk_num_bins; + int rk_num_bins = SimulationParameters.dynamic_options.rk_num_bins; // intialize node variables mesh.initialize_nodes(num_nodes); @@ -1496,21 +1496,21 @@ class MeshBuilder GaussPoint_t& GaussPoints, node_t& node, corner_t& corner, - SimulationParameters_t& SimulationParamaters) const + SimulationParameters_t& SimulationParameters) const { printf("Creating a 2D polar mesh \n"); int num_dim = 2; - int rk_num_bins = SimulationParamaters.dynamic_options.rk_num_bins; + int rk_num_bins = SimulationParameters.dynamic_options.rk_num_bins; - const double inner_radius = SimulationParamaters.mesh_input.inner_radius; - const double outer_radius = SimulationParamaters.mesh_input.outer_radius; + const double inner_radius = SimulationParameters.mesh_input.inner_radius; + const double outer_radius = SimulationParameters.mesh_input.outer_radius; - const double start_angle = PI / 180.0 * SimulationParamaters.mesh_input.starting_angle; - const double end_angle = PI / 180.0 * SimulationParamaters.mesh_input.ending_angle; + const double start_angle = PI / 180.0 * SimulationParameters.mesh_input.starting_angle; + const double end_angle = PI / 180.0 * SimulationParameters.mesh_input.ending_angle; - const int num_elems_i = SimulationParamaters.mesh_input.num_radial_elems; - const int num_elems_j = SimulationParamaters.mesh_input.num_angular_elems; + const int num_elems_i = SimulationParameters.mesh_input.num_radial_elems; + const int num_elems_j = SimulationParameters.mesh_input.num_angular_elems; const int num_points_i = num_elems_i + 1; // num points in x const int num_points_j = num_elems_j + 1; // num points in y @@ -1524,7 +1524,7 @@ class MeshBuilder std::vector origin(num_dim); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParamaters.mesh_input.origin[i]; } + for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } // --- 2D parameters --- // const int num_faces_in_elem = 4; // number of faces in elem @@ -1637,21 +1637,21 @@ class MeshBuilder GaussPoint_t& GaussPoints, node_t& node, corner_t& corner, - SimulationParameters_t& SimulationParamaters) const + SimulationParameters_t& SimulationParameters) const { printf("Creating a 3D box mesh \n"); const int num_dim = 3; - // SimulationParamaters.mesh_input.length.update_host(); - const double lx = SimulationParamaters.mesh_input.length[0]; - const double ly = SimulationParamaters.mesh_input.length[1]; - const double lz = SimulationParamaters.mesh_input.length[2]; + // SimulationParameters.mesh_input.length.update_host(); + const double lx = SimulationParameters.mesh_input.length[0]; + const double ly = SimulationParameters.mesh_input.length[1]; + const double lz = SimulationParameters.mesh_input.length[2]; - // SimulationParamaters.mesh_input.num_elems.update_host(); - const int num_elems_i = SimulationParamaters.mesh_input.num_elems[0]; - const int num_elems_j = SimulationParamaters.mesh_input.num_elems[1]; - const int num_elems_k = SimulationParamaters.mesh_input.num_elems[2]; + // SimulationParameters.mesh_input.num_elems.update_host(); + const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; const int num_points_i = num_elems_i + 1; // num points in x const int num_points_j = num_elems_j + 1; // num points in y @@ -1666,8 +1666,8 @@ class MeshBuilder const int num_elems = num_elems_i * num_elems_j * num_elems_k; std::vector origin(num_dim); - // SimulationParamaters.mesh_input.origin.update_host(); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParamaters.mesh_input.origin[i]; } + // SimulationParameters.mesh_input.origin.update_host(); + for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } // --- 3D parameters --- // const int num_faces_in_elem = 6; // number of faces in elem @@ -1676,7 +1676,7 @@ class MeshBuilder // const int num_edges_in_elem = 12; // number of edges in a elem - int rk_num_bins = SimulationParamaters.dynamic_options.rk_num_bins; + int rk_num_bins = SimulationParameters.dynamic_options.rk_num_bins; // initialize mesh node variables mesh.initialize_nodes(num_nodes); @@ -1778,25 +1778,25 @@ class MeshBuilder GaussPoint_t& GaussPoints, node_t& node, corner_t& corner, - SimulationParameters_t& SimulationParamaters) const + SimulationParameters_t& SimulationParameters) const { printf(" ***** WARNING:: build_3d_HexN_box not yet implemented\n"); const int num_dim = 3; - const int rk_num_bins = SimulationParamaters.dynamic_options.rk_num_bins; + const int rk_num_bins = SimulationParameters.dynamic_options.rk_num_bins; - // SimulationParamaters.mesh_input.length.update_host(); - const double lx = SimulationParamaters.mesh_input.length[0]; - const double ly = SimulationParamaters.mesh_input.length[1]; - const double lz = SimulationParamaters.mesh_input.length[2]; + // SimulationParameters.mesh_input.length.update_host(); + const double lx = SimulationParameters.mesh_input.length[0]; + const double ly = SimulationParameters.mesh_input.length[1]; + const double lz = SimulationParameters.mesh_input.length[2]; - // SimulationParamaters.mesh_input.num_elems.update_host(); - const int num_elems_i = SimulationParamaters.mesh_input.num_elems[0]; - const int num_elems_j = SimulationParamaters.mesh_input.num_elems[1]; - const int num_elems_k = SimulationParamaters.mesh_input.num_elems[2]; + // SimulationParameters.mesh_input.num_elems.update_host(); + const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; // creating zones for the Pn order - const int Pn_order = SimulationParamaters.mesh_input.p_order; + const int Pn_order = SimulationParameters.mesh_input.p_order; if (Pn_order > 19) { printf("Fierro DG and RD solvers are only valid for elements up to Pn = 19 \n"); @@ -1820,7 +1820,7 @@ class MeshBuilder // const int num_zones = num_zones_i*num_zones_j*num_zones_k; // accounts for Pn std::vector origin(num_dim); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParamaters.mesh_input.origin[i]; } + for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } // --- 3D parameters --- // const int num_faces_in_zone = 6; // number of faces in zone @@ -1979,7 +1979,7 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void write_mesh(Mesh_t& mesh, State_t& State, - SimulationParameters_t& SimulationParamaters, + SimulationParameters_t& SimulationParameters, double dt, double time_value, CArray graphics_times, @@ -2120,7 +2120,7 @@ class MeshWriter size_t num_mat_pt_tensor_vars = 0; // count the number of material point state vars to write out - for (auto field : SimulationParamaters.output_options.output_mat_pt_state){ + for (auto field : SimulationParameters.output_options.output_mat_pt_state){ switch(field){ // scalar vars to write out case material_pt_state::density: @@ -2180,7 +2180,7 @@ class MeshWriter size_t num_elem_tensor_vars = 0; // count the number of element average fields to write out - for (auto field : SimulationParamaters.output_options.output_elem_state){ + for (auto field : SimulationParameters.output_options.output_elem_state){ switch(field){ // scalar vars to write out case material_pt_state::density: @@ -2236,7 +2236,7 @@ class MeshWriter size_t num_gauss_pt_tensor_vars = 0; // gauss point values to ouptput - for (auto field : SimulationParamaters.output_options.output_gauss_pt_state){ + for (auto field : SimulationParameters.output_options.output_gauss_pt_state){ switch(field){ // scalar vars to write out case gauss_pt_state::volume: @@ -2288,7 +2288,7 @@ class MeshWriter size_t tensor_var = 0; // material point state to output - for (auto field : SimulationParamaters.output_options.output_mat_pt_state){ + for (auto field : SimulationParameters.output_options.output_mat_pt_state){ switch(field){ // scalar vars case material_pt_state::density: @@ -2382,7 +2382,7 @@ class MeshWriter tensor_var = 0; // element state to output - for (auto field : SimulationParamaters.output_options.output_elem_state){ + for (auto field : SimulationParameters.output_options.output_elem_state){ switch(field){ // scalar vars case material_pt_state::density: @@ -2453,7 +2453,7 @@ class MeshWriter int div_id = -1; int vel_grad_id = -1; - for (auto field : SimulationParamaters.output_options.output_gauss_pt_state){ + for (auto field : SimulationParameters.output_options.output_gauss_pt_state){ switch(field){ // scalars case gauss_pt_state::volume: @@ -2485,7 +2485,7 @@ class MeshWriter size_t num_node_scalar_vars = 0; size_t num_node_vector_vars = 0; - for (auto field : SimulationParamaters.output_options.output_node_state){ + for (auto field : SimulationParameters.output_options.output_node_state){ switch(field){ case node_state::mass: num_node_scalar_vars ++; @@ -2527,7 +2527,7 @@ class MeshWriter vector_var = 0; tensor_var = 0; - for (auto field : SimulationParamaters.output_options.output_node_state){ + for (auto field : SimulationParameters.output_options.output_node_state){ switch(field){ // scalars case node_state::mass: @@ -2599,8 +2599,8 @@ class MeshWriter elem_scalar_fields, elem_tensor_fields, State.MaterialToMeshMaps(mat_id).elem, - SimulationParamaters.output_options.output_elem_state, - SimulationParamaters.output_options.output_gauss_pt_state, + SimulationParameters.output_options.output_elem_state, + SimulationParameters.output_options.output_gauss_pt_state, num_mat_elems, num_elems, den_id, @@ -2641,7 +2641,7 @@ class MeshWriter concatenate_nodal_fields(State.node, node_scalar_fields, node_vector_fields, - SimulationParamaters.output_options.output_node_state, + SimulationParameters.output_options.output_node_state, dt, num_nodes, num_dims, @@ -2660,8 +2660,8 @@ class MeshWriter // Write the nodal and elem fields // ******************************** - if (SimulationParamaters.output_options.format == output_options::viz || - SimulationParamaters.output_options.format == output_options::viz_and_state) { + if (SimulationParameters.output_options.format == output_options::viz || + SimulationParameters.output_options.format == output_options::viz_and_state) { // create the folder structure if it does not exist struct stat st; @@ -2741,7 +2741,7 @@ class MeshWriter mat_elem_scalar_fields, mat_elem_tensor_fields, State.MaterialToMeshMaps(mat_id).elem, - SimulationParamaters.output_options.output_mat_pt_state, + SimulationParameters.output_options.output_mat_pt_state, num_mat_elems, mat_den_id, mat_pres_id, @@ -2863,12 +2863,12 @@ class MeshWriter // STATE - if (SimulationParamaters.output_options.format == output_options::state || - SimulationParamaters.output_options.format == output_options::viz_and_state) { + if (SimulationParameters.output_options.format == output_options::state || + SimulationParameters.output_options.format == output_options::viz_and_state) { write_material_point_state(mesh, State, - SimulationParamaters, + SimulationParameters, time_value, graphics_times, node_states, @@ -2879,10 +2879,10 @@ class MeshWriter // will drop ensight outputs in the near future - if (SimulationParamaters.output_options.format == output_options::ensight){ + if (SimulationParameters.output_options.format == output_options::ensight){ write_ensight(mesh, State, - SimulationParamaters, + SimulationParameters, dt, time_value, graphics_times, @@ -2908,7 +2908,7 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void write_ensight(Mesh_t& mesh, State_t& State, - SimulationParameters_t& SimulationParamaters, + SimulationParameters_t& SimulationParameters, double dt, double time_value, CArray graphics_times, @@ -3319,7 +3319,7 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void write_vtk_old(Mesh_t& mesh, State_t& State, - SimulationParameters_t& SimulationParamaters, + SimulationParameters_t& SimulationParameters, double dt, double time_value, CArray graphics_times, @@ -4640,7 +4640,7 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void write_material_point_state(Mesh_t& mesh, State_t& State, - SimulationParameters_t& SimulationParamaters, + SimulationParameters_t& SimulationParameters, double time_value, CArray graphics_times, std::vector node_states, diff --git a/single-node-refactor/src/common/include/region_fill.h b/single-node-refactor/src/common/include/region_fill.h index de3ed2768..7c78cf298 100644 --- a/single-node-refactor/src/common/include/region_fill.h +++ b/single-node-refactor/src/common/include/region_fill.h @@ -44,7 +44,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "geometry_new.h" -struct SimulationParamaters_t; +struct SimulationParameters_t; struct Material_t; struct Mesh_t; struct BoundaryCondition_t; @@ -58,7 +58,7 @@ using namespace mtr; // ----------------------------------------------------------------------------- // The functions to setup fields on a mesh // ------------------------------------------------------------------------------ -void simulation_setup(SimulationParameters_t& SimulationParamaters, +void simulation_setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -98,7 +98,7 @@ void fill_regions( // ----------------------------------------------------------------------------- // A function to populate the material point and material zone state // ------------------------------------------------------------------------------ -void material_state_setup(SimulationParameters_t& SimulationParamaters, +void material_state_setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, diff --git a/single-node-refactor/src/common/src/region_fill.cpp b/single-node-refactor/src/common/src/region_fill.cpp index de3844f0e..c9b2f6b5e 100644 --- a/single-node-refactor/src/common/src/region_fill.cpp +++ b/single-node-refactor/src/common/src/region_fill.cpp @@ -47,7 +47,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -void simulation_setup(SimulationParameters_t& SimulationParamaters, +void simulation_setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -65,7 +65,7 @@ void simulation_setup(SimulationParameters_t& SimulationParamaters, const size_t num_mats = Materials.num_mats; // the number of materials on the mesh - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_bins; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_bins; // Calculate element volume @@ -82,7 +82,7 @@ void simulation_setup(SimulationParameters_t& SimulationParamaters, fillGaussState.initialize(num_gauss_points, num_mats_per_elem, num_dims, - SimulationParamaters.region_setups.fill_gauss_states); + SimulationParameters.region_setups.fill_gauss_states); // the elem state is always used, thus always initialized fillElemState.initialize(num_elems, @@ -118,11 +118,11 @@ void simulation_setup(SimulationParameters_t& SimulationParamaters, fillElemState.mat_id, fillElemState.num_mats_saved_in_elem, voxel_elem_mat_id, - SimulationParamaters.mesh_input.object_ids, - SimulationParamaters.region_setups.region_fills, - SimulationParamaters.region_setups.region_fills_host, - SimulationParamaters.region_setups.fill_gauss_states, - SimulationParamaters.region_setups.fill_node_states, + SimulationParameters.mesh_input.object_ids, + SimulationParameters.region_setups.region_fills, + SimulationParameters.region_setups.region_fills_host, + SimulationParameters.region_setups.fill_gauss_states, + SimulationParameters.region_setups.fill_node_states, rk_num_bins, num_mats_per_elem); @@ -626,7 +626,7 @@ void fill_regions( /// /// \brief a function to setup the material point and zone state /// -/// \param SimulationParamaters holds the simulation parameters +/// \param SimulationParameters holds the simulation parameters /// \param Materials is the material object /// \param mesh is the mesh object /// \param Boundary is the boundary condition object @@ -635,7 +635,7 @@ void fill_regions( /// \param fillElemState is a vector of enums telling what elem state to set /// ///////////////////////////////////////////////////////////////////////////// -void material_state_setup(SimulationParameters_t& SimulationParamaters, +void material_state_setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, @@ -654,7 +654,7 @@ void material_state_setup(SimulationParameters_t& SimulationParamaters, const size_t num_mats = Materials.num_mats; // the number of materials on the mesh - const size_t rk_num_bins = SimulationParamaters.dynamic_options.rk_num_bins; + const size_t rk_num_bins = SimulationParameters.dynamic_options.rk_num_bins; // a counter for the Material index spaces DCArrayKokkos num_elems_saved_for_mat(num_mats, "setup_num_elems_saved_for_mat"); diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index 2e68cb550..346c8a45c 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -58,25 +58,25 @@ void Driver::initialize() exit(0); } - parse_yaml(root, SimulationParamaters, Materials, BoundaryConditions); + parse_yaml(root, SimulationParameters, Materials, BoundaryConditions); std::cout << "Finished parsing YAML file" << std::endl; - if (SimulationParamaters.mesh_input.source == mesh_input::file) { + if (SimulationParameters.mesh_input.source == mesh_input::file) { // Create and/or read mesh - std::cout << "Mesh file path: " << SimulationParamaters.mesh_input.file_path << std::endl; - mesh_reader.set_mesh_file(SimulationParamaters.mesh_input.file_path.data()); + std::cout << "Mesh file path: " << SimulationParameters.mesh_input.file_path << std::endl; + mesh_reader.set_mesh_file(SimulationParameters.mesh_input.file_path.data()); mesh_reader.read_mesh(mesh, State, - SimulationParamaters.mesh_input, + SimulationParameters.mesh_input, num_dims, - SimulationParamaters.dynamic_options.rk_num_bins); + SimulationParameters.dynamic_options.rk_num_bins); } - else if (SimulationParamaters.mesh_input.source == mesh_input::generate) { + else if (SimulationParameters.mesh_input.source == mesh_input::generate) { mesh_builder.build_mesh(mesh, State.GaussPoints, State.node, State.corner, - SimulationParamaters); + SimulationParameters); } else{ throw std::runtime_error("**** NO MESH INPUT OPTIONS PROVIDED IN YAML ****"); @@ -93,15 +93,15 @@ void Driver::initialize() // Setup the Solvers - double time_final = SimulationParamaters.dynamic_options.time_final; - for (size_t solver_id = 0; solver_id < SimulationParamaters.solver_inputs.size(); solver_id++) { + double time_final = SimulationParameters.dynamic_options.time_final; + for (size_t solver_id = 0; solver_id < SimulationParameters.solver_inputs.size(); solver_id++) { - if (SimulationParamaters.solver_inputs[solver_id].method == solver_input::SGH3D) { + if (SimulationParameters.solver_inputs[solver_id].method == solver_input::SGH3D) { std::cout << "Initializing dynx_FE solver" << std::endl; SGH3D* sgh_solver = new SGH3D(); - sgh_solver->initialize(SimulationParamaters, + sgh_solver->initialize(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -111,7 +111,7 @@ void Driver::initialize() sgh_solver->solver_id = solver_id; // set the start and ending times - double t_end = SimulationParamaters.solver_inputs[solver_id].time_end; // default is t=0 + double t_end = SimulationParameters.solver_inputs[solver_id].time_end; // default is t=0 if(solver_id==0){ sgh_solver->time_start = 0.0; @@ -147,12 +147,12 @@ void Driver::initialize() } // end if SGH solver - else if (SimulationParamaters.solver_inputs[solver_id].method == solver_input::SGHRZ) { + else if (SimulationParameters.solver_inputs[solver_id].method == solver_input::SGHRZ) { std::cout << "Initializing dynx_FE_RZ solver" << std::endl; SGHRZ* sgh_solver_rz = new SGHRZ(); - sgh_solver_rz->initialize(SimulationParamaters, + sgh_solver_rz->initialize(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -162,7 +162,7 @@ void Driver::initialize() sgh_solver_rz->solver_id = solver_id; // set the start and ending times - double t_end = SimulationParamaters.solver_inputs[solver_id].time_end; // default is t=0 + double t_end = SimulationParameters.solver_inputs[solver_id].time_end; // default is t=0 if(solver_id==0){ sgh_solver_rz->time_start = 0.0; @@ -196,12 +196,12 @@ void Driver::initialize() solvers.push_back(sgh_solver_rz); } // end if SGHRZ solver - else if (SimulationParamaters.solver_inputs[solver_id].method == solver_input::SGTM3D) { + else if (SimulationParameters.solver_inputs[solver_id].method == solver_input::SGTM3D) { std::cout << "Initializing thrmex_FE solver" << std::endl; SGTM3D* sgtm_solver_3d = new SGTM3D(); - sgtm_solver_3d->initialize(SimulationParamaters, + sgtm_solver_3d->initialize(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -211,7 +211,7 @@ void Driver::initialize() sgtm_solver_3d->solver_id = solver_id; // set the start and ending times - double t_end = SimulationParamaters.solver_inputs[solver_id].time_end; // default is t=0 + double t_end = SimulationParameters.solver_inputs[solver_id].time_end; // default is t=0 if(solver_id==0){ sgtm_solver_3d->time_start = 0.0; @@ -259,7 +259,7 @@ void Driver::initialize() fillGaussState_t fillGaussState; fillElemState_t fillElemState; - simulation_setup(SimulationParamaters, + simulation_setup(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -270,7 +270,7 @@ void Driver::initialize() // Allocate material state for (auto& solver : solvers) { - solver->initialize_material_state(SimulationParamaters, + solver->initialize_material_state(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -279,7 +279,7 @@ void Driver::initialize() // populate the material point state - material_state_setup(SimulationParamaters, + material_state_setup(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -301,7 +301,7 @@ void Driver::setup() // allocate state, setup models, and apply fill instructions for (auto& solver : solvers) { - solver->setup(SimulationParamaters, + solver->setup(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -322,7 +322,7 @@ void Driver::execute() { std::cout << "Inside driver execute" << std::endl; for (auto& solver : solvers) { - solver->execute(SimulationParamaters, + solver->execute(SimulationParameters, Materials, BoundaryConditions, mesh, @@ -345,7 +345,7 @@ void Driver::finalize() std::cout << "Inside driver finalize" << std::endl; for (auto& solver : solvers) { if (solver->finalize_flag) { - solver->finalize(SimulationParamaters, + solver->finalize(SimulationParameters, Materials, BoundaryConditions); } diff --git a/single-node-refactor/src/driver.h b/single-node-refactor/src/driver.h index 85b7bbfa5..249808a93 100644 --- a/single-node-refactor/src/driver.h +++ b/single-node-refactor/src/driver.h @@ -65,7 +65,7 @@ class Driver MeshReader mesh_reader; MeshBuilder mesh_builder; - SimulationParameters_t SimulationParamaters; ///< the input simulation parameters + SimulationParameters_t SimulationParameters; ///< the input simulation parameters // --------------------------------------------------------------------- // Material and Boundary declarations diff --git a/single-node-refactor/src/input/parse_yaml.cpp b/single-node-refactor/src/input/parse_yaml.cpp index a900fcc82..de6bf2d8f 100644 --- a/single-node-refactor/src/input/parse_yaml.cpp +++ b/single-node-refactor/src/input/parse_yaml.cpp @@ -79,34 +79,34 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // ================================================================================= // Parse YAML file // ================================================================================= -void parse_yaml(Yaml::Node& root, SimulationParameters_t& SimulationParamaters, Material_t& Materials, BoundaryCondition_t& Boundary) +void parse_yaml(Yaml::Node& root, SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary) { - parse_mesh_inputs(root, SimulationParamaters.mesh_input); + parse_mesh_inputs(root, SimulationParameters.mesh_input); - parse_dynamic_options(root, SimulationParamaters.dynamic_options); + parse_dynamic_options(root, SimulationParameters.dynamic_options); - parse_output_options(root, SimulationParamaters.output_options); + parse_output_options(root, SimulationParameters.output_options); - parse_solver_input(root, SimulationParamaters.solver_inputs); + parse_solver_input(root, SimulationParameters.solver_inputs); // parse the region yaml text into a vector of boundary conditions - size_t num_solvers = SimulationParamaters.solver_inputs.size(); + size_t num_solvers = SimulationParameters.solver_inputs.size(); parse_bcs(root, Boundary, num_solvers); // parse the region yaml text into a vector of region_fills parse_regions(root, - SimulationParamaters.region_setups.reg_fills_in_solver, - SimulationParamaters.region_setups.num_reg_fills_in_solver, - SimulationParamaters.region_setups.region_fills, - SimulationParamaters.region_setups.region_fills_host, - SimulationParamaters.region_setups.fill_gauss_states, - SimulationParamaters.region_setups.fill_node_states, + SimulationParameters.region_setups.reg_fills_in_solver, + SimulationParameters.region_setups.num_reg_fills_in_solver, + SimulationParameters.region_setups.region_fills, + SimulationParameters.region_setups.region_fills_host, + SimulationParameters.region_setups.fill_gauss_states, + SimulationParameters.region_setups.fill_node_states, num_solvers); // parse the material yaml text into a vector of materials - parse_materials(root, Materials, SimulationParamaters.mesh_input.num_dims); + parse_materials(root, Materials, SimulationParameters.mesh_input.num_dims); } diff --git a/single-node-refactor/src/input/parse_yaml.hpp b/single-node-refactor/src/input/parse_yaml.hpp index 19cc1cf85..ed5ef4496 100644 --- a/single-node-refactor/src/input/parse_yaml.hpp +++ b/single-node-refactor/src/input/parse_yaml.hpp @@ -55,7 +55,7 @@ using namespace mtr; // utility function for parsing YAML file -void parse_yaml(Yaml::Node& root, SimulationParameters_t& SimulationParamaters, Material_t& Materials, BoundaryCondition_t& Boundary); +void parse_yaml(Yaml::Node& root, SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary); #endif // end Header Guard \ No newline at end of file diff --git a/single-node-refactor/src/solver.h b/single-node-refactor/src/solver.h index 3ec2a23d2..1084f511d 100644 --- a/single-node-refactor/src/solver.h +++ b/single-node-refactor/src/solver.h @@ -59,31 +59,31 @@ class Solver Solver(); virtual ~Solver(); - virtual void initialize(SimulationParameters_t& SimulationParamaters, + virtual void initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) const = 0; - virtual void initialize_material_state(SimulationParameters_t& SimulationParamaters, + virtual void initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) const = 0; - virtual void setup(SimulationParameters_t& SimulationParamaters, + virtual void setup(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, State_t& State) = 0; - virtual void execute(SimulationParameters_t& SimulationParamaters, + virtual void execute(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& BoundaryConditions, Mesh_t& mesh, State_t& State) = 0; - virtual void finalize(SimulationParameters_t& SimulationParamaters, + virtual void finalize(SimulationParameters_t& SimulationParameters, Material_t& Materials, BoundaryCondition_t& Boundary) const = 0; From 9bb3cfb43bb7952ca3f65e3a3bd899255dfac17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 10 Apr 2025 00:09:42 -0600 Subject: [PATCH 04/66] WIP: MPI refactor --- .../src/common/include/state.h | 30 +++--- single-node-refactor/src/driver.cpp | 91 ++++++++++++++----- single-node-refactor/src/driver.h | 3 + 3 files changed, 87 insertions(+), 37 deletions(-) diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 1edf4940d..403a6b9d7 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -58,6 +58,12 @@ enum class fill_gauss_state specific_heat }; +//distributed vector type in use +using DistributedMap = TpetraPartitionMap<>; +template +using DistributedDFArray = TpetraDFArray; +template +using DistributedDCArray = TpetraDCArray; ///////////////////////////////////////////////////////////////////////////// /// @@ -266,12 +272,12 @@ enum class node_state ///////////////////////////////////////////////////////////////////////////// struct node_t { - DCArrayKokkos coords; ///< Nodal coordinates - DCArrayKokkos vel; ///< Nodal velocity - DCArrayKokkos mass; ///< Nodal mass - DCArrayKokkos force; ///< Nodal force - DCArrayKokkos temp; ///< Nodal temperature - DCArrayKokkos q_transfer; ///< Nodal heat flux + DistributedDCArray coords; ///< Nodal coordinates + DistributedDCArray vel; ///< Nodal velocity + DistributedDCArray mass; ///< Nodal mass + DistributedDCArray force; ///< Nodal force + DistributedDCArray temp; ///< Nodal temperature + DistributedDCArray q_transfer; ///< Nodal heat flux // initialization method (num_rk_storage_bins, num_nodes, num_dims, state to allocate) void initialize(size_t num_rk, size_t num_nodes, size_t num_dims, std::vector node_states) @@ -279,22 +285,22 @@ struct node_t for (auto field : node_states){ switch(field){ case node_state::coords: - if (coords.size() == 0) this->coords = DCArrayKokkos(num_rk, num_nodes, num_dims, "node_coordinates"); + if (coords.size() == 0) this->coords = DistributedDCArray(num_rk, num_nodes, num_dims, "node_coordinates"); break; case node_state::velocity: - if (vel.size() == 0) this->vel = DCArrayKokkos(num_rk, num_nodes, num_dims, "node_velocity"); + if (vel.size() == 0) this->vel = DistributedDCArray(num_rk, num_nodes, num_dims, "node_velocity"); break; case node_state::force: - if (force.size() == 0) this->force = DCArrayKokkos(num_nodes, num_dims, "node_force"); + if (force.size() == 0) this->force = DistributedDCArray(num_nodes, num_dims, "node_force"); break; case node_state::mass: - if (mass.size() == 0) this->mass = DCArrayKokkos(num_nodes, "node_mass"); + if (mass.size() == 0) this->mass = DistributedDCArray(num_nodes, "node_mass"); break; case node_state::temp: - if (temp.size() == 0) this->temp = DCArrayKokkos(num_rk, num_nodes, "node_temp"); + if (temp.size() == 0) this->temp = DistributedDCArray(num_rk, num_nodes, "node_temp"); break; case node_state::heat_transfer: - if (q_transfer.size() == 0) this->q_transfer = DCArrayKokkos(num_nodes, "node_q_transfer"); + if (q_transfer.size() == 0) this->q_transfer = DistributedDCArray(num_nodes, "node_q_transfer"); break; default: std::cout<<"Desired node state not understood in node_t initialize"< // Initialize driver data. Solver type, number of solvers // Will be parsed from YAML input void Driver::initialize() { - std::cout << "Initializing Driver" << std::endl; + + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + + if(myrank == 0){ + std::cout << "Initializing Driver" << std::endl; + } Yaml::Node root; try { @@ -54,16 +61,26 @@ void Driver::initialize() } catch (const Yaml::Exception e) { - std::cout << "Exception " << e.Type() << ": " << e.what() << std::endl; + if(myrank == 0){ + std::cout << "Exception " << e.Type() << ": " << e.what() << std::endl; + } + + MPI_Finalize(); + MPI_Barrier(MPI_COMM_WORLD); exit(0); } parse_yaml(root, SimulationParameters, Materials, BoundaryConditions); - std::cout << "Finished parsing YAML file" << std::endl; + + if(myrank == 0){ + std::cout << "Finished parsing YAML file" << std::endl; + } if (SimulationParameters.mesh_input.source == mesh_input::file) { // Create and/or read mesh - std::cout << "Mesh file path: " << SimulationParameters.mesh_input.file_path << std::endl; + if(myrank == 0){ + std::cout << "Mesh file path: " << SimulationParameters.mesh_input.file_path << std::endl; + } mesh_reader.set_mesh_file(SimulationParameters.mesh_input.file_path.data()); mesh_reader.read_mesh(mesh, State, @@ -80,7 +97,9 @@ void Driver::initialize() } else{ throw std::runtime_error("**** NO MESH INPUT OPTIONS PROVIDED IN YAML ****"); - return; + MPI_Finalize(); + MPI_Barrier(MPI_COMM_WORLD); + exit(0); } // Build boundary conditions @@ -97,8 +116,10 @@ void Driver::initialize() for (size_t solver_id = 0; solver_id < SimulationParameters.solver_inputs.size(); solver_id++) { if (SimulationParameters.solver_inputs[solver_id].method == solver_input::SGH3D) { - - std::cout << "Initializing dynx_FE solver" << std::endl; + + if(myrank == 0){ + std::cout << "Initializing dynx_FE solver" << std::endl; + } SGH3D* sgh_solver = new SGH3D(); sgh_solver->initialize(SimulationParameters, @@ -140,16 +161,20 @@ void Driver::initialize() } // end if time was set } // end if solver=0 - - std::cout << "Solver " << solver_id << " start time = " << sgh_solver->time_start << ", ending time = " << sgh_solver->time_end << "\n"; + + if(myrank == 0){ + std::cout << "Solver " << solver_id << " start time = " << sgh_solver->time_start << ", ending time = " << sgh_solver->time_end << "\n"; + } solvers.push_back(sgh_solver); } // end if SGH solver else if (SimulationParameters.solver_inputs[solver_id].method == solver_input::SGHRZ) { - - std::cout << "Initializing dynx_FE_RZ solver" << std::endl; + + if(myrank == 0){ + std::cout << "Initializing dynx_FE_RZ solver" << std::endl; + } SGHRZ* sgh_solver_rz = new SGHRZ(); sgh_solver_rz->initialize(SimulationParameters, @@ -191,14 +216,18 @@ void Driver::initialize() } // end if time was set } // end if solver=0 - - std::cout << "Solver " << solver_id << " start time = " << sgh_solver_rz->time_start << ", ending time = " << sgh_solver_rz->time_end << "\n"; + + if(myrank == 0){ + std::cout << "Solver " << solver_id << " start time = " << sgh_solver_rz->time_start << ", ending time = " << sgh_solver_rz->time_end << "\n"; + } solvers.push_back(sgh_solver_rz); } // end if SGHRZ solver else if (SimulationParameters.solver_inputs[solver_id].method == solver_input::SGTM3D) { - - std::cout << "Initializing thrmex_FE solver" << std::endl; + + if(myrank == 0){ + std::cout << "Initializing thrmex_FE solver" << std::endl; + } SGTM3D* sgtm_solver_3d = new SGTM3D(); sgtm_solver_3d->initialize(SimulationParameters, @@ -240,14 +269,18 @@ void Driver::initialize() } // end if time was set } // end if solver=0 - - std::cout << "Solver " << solver_id << " start time = " << sgtm_solver_3d->time_start << ", ending time = " << sgtm_solver_3d->time_end << "\n"; + + if(myrank == 0){ + std::cout << "Solver " << solver_id << " start time = " << sgtm_solver_3d->time_start << ", ending time = " << sgtm_solver_3d->time_end << "\n"; + } solvers.push_back(sgtm_solver_3d); } // end if SGTM solver else { throw std::runtime_error("**** NO SOLVER INPUT OPTIONS PROVIDED IN YAML, OR OPTION NOT UNDERSTOOD ****"); - return; + MPI_Finalize(); + MPI_Barrier(MPI_COMM_WORLD); + exit(0); } } // end for loop over solvers @@ -296,8 +329,10 @@ void Driver::initialize() /// ///////////////////////////////////////////////////////////////////////////// void Driver::setup() -{ - std::cout << "Inside driver setup" << std::endl; +{ + if(myrank == 0){ + std::cout << "Inside driver setup" << std::endl; + } // allocate state, setup models, and apply fill instructions for (auto& solver : solvers) { @@ -319,8 +354,10 @@ void Driver::setup() /// ///////////////////////////////////////////////////////////////////////////// void Driver::execute() -{ - std::cout << "Inside driver execute" << std::endl; +{ + if(myrank == 0){ + std::cout << "Inside driver execute" << std::endl; + } for (auto& solver : solvers) { solver->execute(SimulationParameters, Materials, @@ -341,8 +378,10 @@ void Driver::execute() /// ///////////////////////////////////////////////////////////////////////////// void Driver::finalize() -{ - std::cout << "Inside driver finalize" << std::endl; +{ + if(myrank == 0){ + std::cout << "Inside driver finalize" << std::endl; + } for (auto& solver : solvers) { if (solver->finalize_flag) { solver->finalize(SimulationParameters, @@ -352,7 +391,9 @@ void Driver::finalize() } // destroy FEA modules for (auto& solver : solvers) { - std::cout << "Deleting solver" << std::endl; + if(myrank == 0){ + std::cout << "Deleting solver" << std::endl; + } delete solver; } } diff --git a/single-node-refactor/src/driver.h b/single-node-refactor/src/driver.h index 249808a93..5fc1abea3 100644 --- a/single-node-refactor/src/driver.h +++ b/single-node-refactor/src/driver.h @@ -91,6 +91,9 @@ class Driver // set of enabled solvers std::vector solvers; + //MPI data + int myrank, nranks; + Driver(char* YAML) { yaml_file = YAML; From fa2115f4dd53cfb908d05c9559d4eda00817b2a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 15 Apr 2025 22:15:33 -0600 Subject: [PATCH 05/66] WIP: MPI work --- .../src/common/include/mesh.h | 13 +- .../src/common/include/mesh_io.h | 1270 ++++++++++++++--- .../src/common/include/state.h | 32 + 3 files changed, 1138 insertions(+), 177 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 9c4a15a7a..9a3216751 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -253,7 +253,18 @@ struct Mesh_t legendre_in_elem_t legendre_in_elem; ///< Gauss Legendre points in an element // ---- Node Data Definitions ---- // - size_t num_nodes; ///< Number of nodes in the mesh + size_t num_nodes; ///< Global Number of nodes in the mesh + size_t nlocal_nodes; ///< number of nodes local to this process + size_t nall_nodes; ///< number of local + ghost nodes on this process + size_t nghost_nodes; ///< number of ghost nodes on this process + + //distributed map definitions + DistributedMap node_map; ///< partition of local nodes (stores global node IDs on each process) + DistributedMap all_node_map; ///< partition of local + ghost nodes (stores global node IDs on each process) + DistributedMap ghost_node_map; ///< partition of local + ghost nodes (stores global node IDs on each process) + DistributedMap element_map; ///< partition of uniquely owned elements (stores global node IDs on each process) + DistributedMap all_element_map; ///< partition of uniquely owned + shared elements (stores global node IDs on each process) + DistributedMap nonoverlap_element_node_map; // map of node indices belonging to unique element map RaggedRightArrayKokkos corners_in_node; ///< Corners connected to a node CArrayKokkos num_corners_in_node; ///< Number of corners connected to a node diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 88225e07a..681c1f3c2 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -708,221 +708,1139 @@ class MeshReader /// \param Number of dimensions /// ///////////////////////////////////////////////////////////////////////////// - void read_vtk_mesh(Mesh_t& mesh, - GaussPoint_t& GaussPoints, - node_t& node, - corner_t& corner, - mesh_input_t& mesh_inps, - int num_dims) - { - - std::cout<<"Reading VTK mesh"< v = split (str, delimiter); + + // // looking for the following text: + // // POINTS %d float + // if(v[0] == "POINTS"){ + // size_t num_nodes = std::stoi(v[1]); + // printf("Number of nodes read in %zu\n", num_nodes); + // mesh.initialize_nodes(num_nodes); + + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_nodes, num_dims, required_node_state); + + // found=true; + // } // end if + + + // if (i>1000){ + // std::cerr << "ERROR: Failed to find POINTS in file" << std::endl; + // break; + // } // end if + + // i++; + // } // end while + // } - std::string token; - bool found = false; + // if(myrank==0){ + // // read the node coordinates + // for (node_gid=0; node_gid v = split (str, delimiter); + + // // save the nodal coordinates + // node.coords.host(node_gid, 0) = mesh_inps.scale_x*std::stod(v[0]); // double + // node.coords.host(node_gid, 1) = mesh_inps.scale_y*std::stod(v[1]); // double + // if(num_dims==3){ + // node.coords.host(node_gid, 2) = mesh_inps.scale_z*std::stod(v[2]); // double + // } + + // } // end for nodes + // } + + // // Update device nodal positions + // node.coords.update_device(); - std::ifstream in; // FILE *in; - in.open(mesh_file_); + // if(myrank==0){ + // found=false; + + // // look for CELLS + // i = 0; + // size_t num_elem = 0; + // while (found==false) { + // std::string str; + // std::getline(in, str); + + // std::string delimiter = " "; + // std::vector v = split (str, delimiter); + // std::cout << v[0] << std::endl; // printing + + // // looking for the following text: + // // CELLS num_elem size + // if(v[0] == "CELLS"){ + // num_elem = std::stoi(v[1]); + // printf("Number of elements read in %zu\n", num_elem); + + // // initialize elem variables + // mesh.initialize_elems(num_elem, num_dims); + + // found=true; + // } // end if + + + // if (i>1000){ + // printf("ERROR: Failed to find CELLS \n"); + // break; + // } // end if + + // i++; + // } // end while + // } + // if(myrank==0){ + // // read the node ids in the element + // for (elem_gid=0; elem_gid v = split (str, delimiter); + // num_nodes_in_elem = std::stoi(v[0]); + + // for (size_t node_lid=0; node_lid v = split (str, delimiter); - - // looking for the following text: - // POINTS %d float - if(v[0] == "POINTS"){ - size_t num_nodes = std::stoi(v[1]); - printf("Number of nodes read in %zu\n", num_nodes); - mesh.initialize_nodes(num_nodes); - - std::vector required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dims, required_node_state); + // // Convert from ensight to IJK mesh + // size_t convert_ensight_to_ijk[8]; + // convert_ensight_to_ijk[0] = 0; + // convert_ensight_to_ijk[1] = 1; + // convert_ensight_to_ijk[2] = 3; + // convert_ensight_to_ijk[3] = 2; + // convert_ensight_to_ijk[4] = 4; + // convert_ensight_to_ijk[5] = 5; + // convert_ensight_to_ijk[6] = 7; + // convert_ensight_to_ijk[7] = 6; + + // size_t tmp_ijk_indx[8]; + + // for (size_t elem_gid = 0; elem_gid < num_elem; elem_gid++) { + // for (size_t node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { + // tmp_ijk_indx[node_lid] = mesh.nodes_in_elem.host(elem_gid, convert_ensight_to_ijk[node_lid]); + // } + + // for (size_t node_lid = 0; node_lid < num_nodes_in_elem; node_lid++){ + // mesh.nodes_in_elem.host(elem_gid, node_lid) = tmp_ijk_indx[node_lid]; + // } + // } + // // update device side + // mesh.nodes_in_elem.update_device(); + + + // // initialize corner variables + // size_t num_corners = num_elem * num_nodes_in_elem; + // mesh.initialize_corners(num_corners); + + + // // Build connectivity + // mesh.build_connectivity(); + + + // found=false; + + // printf("\n"); + + + // // look for CELL_TYPE + // if(myrank==0){ + // i = 0; + // size_t elem_type = 0; + // while (found==false) { + // std::string str; + // std::string delimiter = " "; + // std::getline(in, str); + // std::vector v = split (str, delimiter); - found=true; - } // end if + // // looking for the following text: + // // CELLS num_elem size + // if(v[0] == "CELL_TYPES"){ + + // std::getline(in, str); + // elem_type = std::stoi(str); + + // found=true; + // } // end if + + + // if (i>1000){ + // printf("ERROR: Failed to find elem_TYPE \n"); + // break; + // } // end if + + // i++; + // } // end while + // printf("Element type = %zu \n", elem_type); + // // elem types: + // // linear hex = 12, linear quad = 9 + // found=false; - if (i>1000){ - std::cerr << "ERROR: Failed to find POINTS in file" << std::endl; - break; - } // end if + // if(num_nodes_in_elem==8 & elem_type != 12) { + // printf("Wrong element type of %zu \n", elem_type); + // std::cerr << "ERROR: incorrect element type in VTK file" << std::endl; + // } - i++; - } // end while + // in.close(); + // } - // read the node coordinates - for (node_gid=0; node_gid v = split (str, delimiter); - - // save the nodal coordinates - node.coords.host(node_gid, 0) = mesh_inps.scale_x*std::stod(v[0]); // double - node.coords.host(node_gid, 1) = mesh_inps.scale_y*std::stod(v[1]); // double - if(num_dims==3){ - node.coords.host(node_gid, 2) = mesh_inps.scale_z*std::stod(v[2]); // double + // } // end of VTKread function + + void read_vtk_mesh(Mesh_t& mesh, + GaussPoint_t& GaussPoints, + node_t& node, + corner_t& corner, + mesh_input_t& mesh_inps, + int num_dims) + { + char ch; + std::string skip_line, read_line, substring; + std::stringstream line_parse; + + std::cout<<"Reading VTK mesh"< read_buffer; + + + // read the mesh + // --- Read the number of nodes in the mesh --- // + num_nodes = 0; + if (myrank == 0) + { + std::cout << " NUM DIM is " << num_dims << std::endl; + in = new std::ifstream(); + in->open(MESH); + bool found = false; + + while (found == false&&in->good()) { + std::getline(*in, read_line); + line_parse.str(""); + line_parse.clear(); + line_parse << read_line; + line_parse >> substring; + + // looking for the following text: + // POINTS %d float + if (substring == "POINTS") + { + line_parse >> num_nodes; + std::cout << "declared node count: " << num_nodes << std::endl; + if (num_nodes <= 0) + { + throw std::runtime_error("ERROR, NO NODES IN MESH"); + } + found = true; + } // end if + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find POINTS"); + } // end if + + } // end if(myrank==0) + + // broadcast number of nodes + MPI_Bcast(&num_nodes, 1, MPI_LONG_LONG_INT, 0, world); + + //allocate pre-partition node coords using contiguous decomposition + //FArray type used since CArray type still doesnt support zoltan2 decomposition + DistributedDFArray node_coords_distributed(num_nodes, num_dims); + + // construct contiguous parallel row map now that we know the number of nodes + DistributedMap map = node_coords_distributed.pmap; + // map->describe(*fos,Teuchos::VERB_EXTREME); + + // set the vertices in the mesh read in + size_t nlocal_nodes = map.size(); + + // scope ensures view is destroyed for now to avoid calling a device view with an active host view later + { + host_vec_array node_coords = node_coords_distributed->getLocalView(Tpetra::Access::ReadWrite); + // host_vec_array node_coords = dual_node_coords.view_host(); + // notify that the host view is going to be modified in the file readin + // dual_node_coords.modify_host(); + + // old swage method + // mesh->init_nodes(local_nrows); // add 1 for index starting at 1 + + //std::cout << "Num nodes assigned to task " << myrank << " = " << nlocal_nodes << std::endl; + + // read the initial mesh coordinates + // x-coords + /*only task 0 reads in nodes and elements from the input file + stores node data in a buffer and communicates once the buffer cap is reached + or the data ends*/ + + words_per_line = input_options.words_per_line; + elem_words_per_line = input_options.elem_words_per_line; + + // allocate read buffer + read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); + + dof_limit = num_nodes; + buffer_iterations = dof_limit / BUFFER_LINES; + if (dof_limit % BUFFER_LINES != 0) + { + buffer_iterations++; } - - } // end for nodes + // read coords + read_index_start = 0; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + { + getline(*in, read_line); + line_parse.clear(); + line_parse.str(read_line); + + for (int iword = 0; iword < words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring <> substring; + // debug print + // std::cout<<" "<< substring <isNodeGlobalElement(node_gid)) + { + // set local node index in this mpi rank + node_rid = map->getLocalElement(node_gid); + // extract nodal position from the read buffer + // for tecplot format this is the three coords in the same line + dof_value = atof(&read_buffer(scan_loop, 0, 0)); + node_coords(node_rid, 0) = dof_value * unit_scaling; + dof_value = atof(&read_buffer(scan_loop, 1, 0)); + node_coords(node_rid, 1) = dof_value * unit_scaling; + if (num_dim == 3) + { + dof_value = atof(&read_buffer(scan_loop, 2, 0)); + node_coords(node_rid, 2) = dof_value * unit_scaling; + } + } + } + read_index_start += BUFFER_LINES; + } + } // end of coordinate readin + // repartition node distribution + repartition_nodes(false); + std::vector required_node_state = { node_state::coords }; + node.post_repartition_initialize(partitioned_node_map, num_dims, required_node_state); - found=false; + // synchronize device data + // dual_node_coords.sync_device(); + // dual_node_coords.modify_device(); - // look for CELLS - i = 0; - size_t num_elem = 0; - while (found==false) { - std::string str; - std::getline(in, str); - - std::string delimiter = " "; - std::vector v = split (str, delimiter); - std::cout << v[0] << std::endl; // printing - - // looking for the following text: - // CELLS num_elem size - if(v[0] == "CELLS"){ - num_elem = std::stoi(v[1]); - printf("Number of elements read in %zu\n", num_elem); - - // initialize elem variables - mesh.initialize_elems(num_elem, num_dims); - - found=true; - } // end if - - - if (i>1000){ - printf("ERROR: Failed to find CELLS \n"); - break; + // debug print of nodal data + + // debug print nodal positions and indices + /* + std::cout << " ------------NODAL POSITIONS ON TASK " << myrank << " --------------"<getGlobalElement(inode) + 1 << " { "; + for (int istride = 0; istride < num_dim; istride++){ + std::cout << node_coords(inode,istride) << " , "; + } + std::cout << " }"<< std::endl; + } + */ + + // check that local assignments match global total + + // read in element info (ensight file format is organized in element type sections) + // loop over this later for several element type sections + + num_elem = 0; + rnum_elem = 0; + CArrayKokkos node_store(elem_words_per_line); + + // --- read the number of cells in the mesh --- + // --- Read the number of vertices in the mesh --- // + if (myrank == 0) + { + bool found = false; + while (found == false&&in->good()) { + std::getline(*in, read_line); + line_parse.str(""); + line_parse.clear(); + line_parse << read_line; + line_parse >> substring; + + // looking for the following text: + // CELLS num_cells size + if (substring == "CELLS") + { + line_parse >> num_elem; + std::cout << "declared element count: " << num_elem << std::endl; + if (num_elem <= 0) + { + throw std::runtime_error("ERROR, NO ELEMENTS IN MESH"); + } + found = true; + } // end if + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find CELLS"); } // end if - - i++; - } // end while - - - // read the node ids in the element - for (elem_gid=0; elem_gid v = split (str, delimiter); - num_nodes_in_elem = std::stoi(v[0]); - - for (size_t node_lid=0; node_lid(BUFFER_LINES, elem_words_per_line, MAX_WORD); + + // calculate buffer iterations to read number of lines + buffer_iterations = num_elem / BUFFER_LINES; + int assign_flag; + + // dynamic buffer used to store elements before we know how many this rank needs + std::vector element_temp(BUFFER_LINES * elem_words_per_line); + std::vector global_indices_temp(BUFFER_LINES); + size_t buffer_max = BUFFER_LINES * elem_words_per_line; + size_t indices_buffer_max = BUFFER_LINES; + + if (num_elem % BUFFER_LINES != 0) + { + buffer_iterations++; + } + read_index_start = 0; + // std::cout << "ELEMENT BUFFER ITERATIONS: " << buffer_iterations << std::endl; + rnum_elem = 0; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + { + getline(*in, read_line); + line_parse.clear(); + line_parse.str(read_line); + // disregard node count line since we're using one element type per mesh + line_parse >> substring; + for (int iword = 0; iword < elem_words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring; + // assign the substring variable as a word of the read buffer + strcpy(&read_buffer(buffer_loop, iword, 0), substring.c_str()); + } + // std::cout <> substring; + for (int iword = 0; iword < elem_words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring; + // assign the substring variable as a word of the read buffer + strcpy(&read_buffer(buffer_loop, iword, 0), substring.c_str()); + } + // std::cout <isNodeGlobalElement(node_gid) && !assign_flag) + { + assign_flag = 1; + rnum_elem++; + } + } + else + { + if (map->isNodeGlobalElement(node_gid - 1) && !assign_flag) + { + assign_flag = 1; + rnum_elem++; + } + } + } - for (size_t elem_gid = 0; elem_gid < num_elem; elem_gid++) { - for (size_t node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - tmp_ijk_indx[node_lid] = mesh.nodes_in_elem.host(elem_gid, convert_ensight_to_ijk[node_lid]); + if (assign_flag) + { + for (int inode = 0; inode < elem_words_per_line; inode++) + { + if ((rnum_elem - 1) * elem_words_per_line + inode >= buffer_max) + { + element_temp.resize((rnum_elem - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line); + buffer_max = (rnum_elem - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line; + } + element_temp[(rnum_elem - 1) * elem_words_per_line + inode] = node_store(inode); + // std::cout << "VECTOR STORAGE FOR ELEM " << rnum_elem << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; + } + // assign global element id to temporary list + if (rnum_elem - 1 >= indices_buffer_max) + { + global_indices_temp.resize(rnum_elem - 1 + BUFFER_LINES); + indices_buffer_max = rnum_elem - 1 + BUFFER_LINES; + } + global_indices_temp[rnum_elem - 1] = elem_gid; + } } + read_index_start += BUFFER_LINES; + } - for (size_t node_lid = 0; node_lid < num_nodes_in_elem; node_lid++){ - mesh.nodes_in_elem.host(elem_gid, node_lid) = tmp_ijk_indx[node_lid]; + // std::cout << "RNUM ELEMENTS IS: " << rnum_elem << std::endl; + + Element_Types = CArrayKokkos(rnum_elem); + + elements::elem_types::elem_type mesh_element_type; + + if (simparam.num_dims == 2) + { + if (input_options.element_type == ELEMENT_TYPE::quad4) + { + mesh_element_type = elements::elem_types::Quad4; + max_nodes_per_patch = 2; + } + else if (input_options.element_type == ELEMENT_TYPE::quad8) + { + mesh_element_type = elements::elem_types::Quad8; + max_nodes_per_patch = 3; + } + else if (input_options.element_type == ELEMENT_TYPE::quad12) + { + mesh_element_type = elements::elem_types::Quad12; + max_nodes_per_patch = 4; } + else + { + if (myrank == 0) + { + std::cout << "ELEMENT TYPE UNRECOGNIZED" << std::endl; + } + exit_solver(0); + } + element_select->choose_2Delem_type(mesh_element_type, elem2D); + max_nodes_per_element = elem2D->num_nodes(); } - // update device side - mesh.nodes_in_elem.update_device(); + if (simparam.num_dims == 3) + { + if (input_options.element_type == ELEMENT_TYPE::hex8) + { + mesh_element_type = elements::elem_types::Hex8; + max_nodes_per_patch = 4; + } + else if (input_options.element_type == ELEMENT_TYPE::hex20) + { + mesh_element_type = elements::elem_types::Hex20; + max_nodes_per_patch = 8; + } + else if (input_options.element_type == ELEMENT_TYPE::hex32) + { + mesh_element_type = elements::elem_types::Hex32; + max_nodes_per_patch = 12; + } + else + { + if (myrank == 0) + { + std::cout << "ELEMENT TYPE UNRECOGNIZED" << std::endl; + } + exit_solver(0); + } + element_select->choose_3Delem_type(mesh_element_type, elem); + max_nodes_per_element = elem->num_nodes(); + } - // initialize corner variables - size_t num_corners = num_elem * num_nodes_in_elem; - mesh.initialize_corners(num_corners); + // 1 type per mesh for now + for (int ielem = 0; ielem < rnum_elem; ielem++) + { + Element_Types(ielem) = mesh_element_type; + } + // copy temporary element storage to multivector storage + dual_nodes_in_elem = dual_elem_conn_array("dual_nodes_in_elem", rnum_elem, max_nodes_per_element); + host_elem_conn_array nodes_in_elem = dual_nodes_in_elem.view_host(); + dual_nodes_in_elem.modify_host(); - // Build connectivity - mesh.build_connectivity(); + for (int ielem = 0; ielem < rnum_elem; ielem++) + { + for (int inode = 0; inode < elem_words_per_line; inode++) + { + nodes_in_elem(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; + } + } + // view storage for all local elements connected to local nodes on this rank + // DCArrayKokkos All_Element_Global_Indices(rnum_elem); + Kokkos::DualView All_Element_Global_Indices("All_Element_Global_Indices", rnum_elem); + // copy temporary global indices storage to view storage + for (int ielem = 0; ielem < rnum_elem; ielem++) + { + All_Element_Global_Indices.h_view(ielem) = global_indices_temp[ielem]; + if (global_indices_temp[ielem] < 0) + { + negative_index_found = 1; + } + } - found=false; + MPI_Allreduce(&negative_index_found, &global_negative_index_found, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + if (global_negative_index_found) + { + if (myrank == 0) + { + std::cout << "Node index less than or equal to zero detected; set \"zero_index_base: true\" under \"input_options\" in your yaml file if indices start at 0" << std::endl; + } + exit_solver(0); + } - printf("\n"); - - - // look for CELL_TYPE - i = 0; - size_t elem_type = 0; - while (found==false) { - std::string str; - std::string delimiter = " "; - std::getline(in, str); - std::vector v = split (str, delimiter); - - // looking for the following text: - // CELLS num_elem size - if(v[0] == "CELL_TYPES"){ + // delete temporary element connectivity and index storage + std::vector().swap(element_temp); + std::vector().swap(global_indices_temp); - std::getline(in, str); - elem_type = std::stoi(str); - - found=true; - } // end if - + All_Element_Global_Indices.modify_host(); + All_Element_Global_Indices.sync_device(); + + // debug print + /* + Kokkos::View All_Element_Global_Indices_pass("All_Element_Global_Indices_pass",rnum_elem); + deep_copy(All_Element_Global_Indices_pass, All_Element_Global_Indices.h_view); + std::cout << " ------------ELEMENT GLOBAL INDICES ON TASK " << myrank << " --------------"<(Teuchos::OrdinalTraits::invalid(), All_Element_Global_Indices.d_view, 0, comm)); + + // element type selection (subject to change) + // ---- Set Element Type ---- // + // allocate element type memory + // elements::elem_type_t* elem_choice; + + int NE = 1; // number of element types in problem + + // Convert ensight index system to the ijk finite element numbering convention + // for vertices in cell + if (active_node_ordering_convention == IJK) + { + CArrayKokkos convert_ensight_to_ijk(max_nodes_per_element); + CArrayKokkos tmp_ijk_indx(max_nodes_per_element); + convert_ensight_to_ijk(0) = 0; + convert_ensight_to_ijk(1) = 1; + convert_ensight_to_ijk(2) = 3; + convert_ensight_to_ijk(3) = 2; + convert_ensight_to_ijk(4) = 4; + convert_ensight_to_ijk(5) = 5; + convert_ensight_to_ijk(6) = 7; + convert_ensight_to_ijk(7) = 6; + + int nodes_per_element; + + if (num_dim == 2) + { + for (int cell_rid = 0; cell_rid < rnum_elem; cell_rid++) + { + // set nodes per element + element_select->choose_2Delem_type(Element_Types(cell_rid), elem2D); + nodes_per_element = elem2D->num_nodes(); + for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) + { + tmp_ijk_indx(node_lid) = nodes_in_elem(cell_rid, convert_ensight_to_ijk(node_lid)); + } + + for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) + { + nodes_in_elem(cell_rid, node_lid) = tmp_ijk_indx(node_lid); + } + } + } + + if (num_dim == 3) + { + for (int cell_rid = 0; cell_rid < rnum_elem; cell_rid++) + { + // set nodes per element + element_select->choose_3Delem_type(Element_Types(cell_rid), elem); + nodes_per_element = elem->num_nodes(); + for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) + { + tmp_ijk_indx(node_lid) = nodes_in_elem(cell_rid, convert_ensight_to_ijk(node_lid)); + } + + for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) + { + nodes_in_elem(cell_rid, node_lid) = tmp_ijk_indx(node_lid); + } + } + } + } + + //If restarting a topology optimization run, obtain nodal design density data here + if(topology_optimization_restart){ + design_node_densities_distributed = Teuchos::rcp(new MV(map, 1)); + host_vec_array node_densities = design_node_densities_distributed->getLocalView (Tpetra::Access::ReadWrite); + if (myrank == 0) + { + bool found = false; + while (found == false&&in->good()) { + std::getline(*in, read_line); + //std::cout << read_line << std::endl; + line_parse.clear(); + line_parse.str(read_line); + + //stop when the design_density string is reached + while (!line_parse.eof()){ + line_parse >> substring; + //std::cout << substring << std::endl; + if(!substring.compare("design_density")){ + found = true; + } + } //while + + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find design_density"); + } // end if + + //skip "LOOKUP_TABLE default" line + std::getline(*in, read_line); + } // end if(myrank==0) - if (i>1000){ - printf("ERROR: Failed to find elem_TYPE \n"); - break; - } // end if + //read in density of each node + // allocate read buffer + words_per_line = 1; + read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); + + dof_limit = num_nodes; + buffer_iterations = dof_limit / BUFFER_LINES; + if (dof_limit % BUFFER_LINES != 0) + { + buffer_iterations++; + } + + // read densities + read_index_start = 0; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + { + getline(*in, read_line); + line_parse.clear(); + line_parse.str(read_line); + + for (int iword = 0; iword < words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring <> substring; + // debug print + // std::cout<<" "<< substring <isNodeGlobalElement(node_gid)) + { + // set local node index in this mpi rank + node_rid = map->getLocalElement(node_gid); + // extract nodal position from the read buffer + // for tecplot format this is the three coords in the same line + dof_value = atof(&read_buffer(scan_loop, 0, 0)); + node_densities(node_rid, 0) = dof_value; + } + } + read_index_start += BUFFER_LINES; + } + + //Find initial objective value to normalize by + if (myrank == 0 && simparam.optimization_options.normalized_objective) + { + bool found = false; + while (found == false&&in->good()) { + std::getline(*in, read_line); + //std::cout << read_line << std::endl; + line_parse.clear(); + line_parse.str(read_line); + + //stop when the design_density string is reached + while (!line_parse.eof()){ + line_parse >> substring; + //std::cout << substring << std::endl; + if(!substring.compare("Objective_Normalization_Constant")){ + found = true; + line_parse >> substring; + simparam.optimization_options.objective_normalization_constant = stod(substring); + } + } //while + + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find initial objective value for restart"); + } // end if + } // end if(myrank==0) + } + + //If restarting a topology optimization run, obtain nodal design density data here + if(shape_optimization_restart){ + design_node_coords_distributed = Teuchos::rcp(new MV(map, num_dim)); + host_vec_array design_node_coords = design_node_coords_distributed->getLocalView (Tpetra::Access::ReadWrite); + if (myrank == 0) + { + bool found = false; + while (found == false&&in->good()) { + std::getline(*in, read_line); + //std::cout << read_line << std::endl; + line_parse.clear(); + line_parse.str(read_line); + + //stop when the design_density string is reached + while (!line_parse.eof()){ + line_parse >> substring; + //std::cout << substring << std::endl; + if(!substring.compare("design_coordinates")){ + found = true; + } + } //while + + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find design_coordinates"); + } // end if + + //skip "LOOKUP_TABLE default" line + std::getline(*in, read_line); + } // end if(myrank==0) - i++; - } // end while - printf("Element type = %zu \n", elem_type); - // elem types: - // linear hex = 12, linear quad = 9 - found=false; - - - if(num_nodes_in_elem==8 & elem_type != 12) { - printf("Wrong element type of %zu \n", elem_type); - std::cerr << "ERROR: incorrect element type in VTK file" << std::endl; + //read in density of each node + // allocate read buffer + words_per_line = num_dim; + read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); + + dof_limit = num_nodes; + buffer_iterations = dof_limit / BUFFER_LINES; + if (dof_limit % BUFFER_LINES != 0) + { + buffer_iterations++; + } + + // read densities + read_index_start = 0; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + { + getline(*in, read_line); + line_parse.clear(); + line_parse.str(read_line); + + for (int iword = 0; iword < words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring <> substring; + // debug print + // std::cout<<" "<< substring <isNodeGlobalElement(node_gid)) + { + // set local node index in this mpi rank + node_rid = map->getLocalElement(node_gid); + // extract nodal position from the read buffer + // for tecplot format this is the three coords in the same line + dof_value = atof(&read_buffer(scan_loop, 0, 0)); + design_node_coords(node_rid, 0) = dof_value * unit_scaling; + dof_value = atof(&read_buffer(scan_loop, 1, 0)); + design_node_coords(node_rid, 1) = dof_value * unit_scaling; + if(num_dim==3){ + dof_value = atof(&read_buffer(scan_loop, 2, 0)); + design_node_coords(node_rid, 2) = dof_value * unit_scaling; + } + } + } + read_index_start += BUFFER_LINES; + } + + //Find initial objective value to normalize by + if (myrank == 0 && simparam.optimization_options.normalized_objective) + { + bool found = false; + while (found == false&&in->good()) { + std::getline(*in, read_line); + //std::cout << read_line << std::endl; + line_parse.clear(); + line_parse.str(read_line); + + //stop when the design_density string is reached + while (!line_parse.eof()){ + line_parse >> substring; + //std::cout << substring << std::endl; + if(!substring.compare("Objective_Normalization_Constant")){ + found = true; + line_parse >> substring; + simparam.optimization_options.objective_normalization_constant = stod(substring); + } + } //while + + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find initial objective value for restart"); + } // end if + } // end if(myrank==0) } - - in.close(); - - } // end of VTKread function + + // Close mesh input file + if (myrank == 0) + { + in->close(); + } + } // end read_mesh ///////////////////////////////////////////////////////////////////////////// diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 1d9d1c55b..6befed212 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -315,6 +315,38 @@ struct node_t } }; // end method + void post_repartition_initialize(DistributedMap partitioned_map, size_t num_dims, std::vector node_states) + { + for (auto field : node_states){ + switch(field){ + case node_state::coords: + if (coords.size() == 0) this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); + if (coords_n0.size() == 0) this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); + break; + case node_state::velocity: + if (vel.size() == 0) this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); + if (vel_n0.size() == 0) this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); + break; + case node_state::force: + if (force.size() == 0) this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); + break; + case node_state::mass: + if (mass.size() == 0) this->mass = DistributedDCArray(partitioned_map, "node_mass"); + break; + case node_state::temp: + if (temp.size() == 0) this->temp = DistributedDCArray(partitioned_map, "node_temp"); + if (temp_n0.size() == 0) this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); + break; + case node_state::heat_transfer: + if (q_transfer.size() == 0) this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); + break; + default: + std::cout<<"Desired node state not understood in node_t initialize"< Date: Mon, 21 Apr 2025 21:26:03 -0600 Subject: [PATCH 06/66] WIP: MPI refactor --- .../src/common/include/mesh.h | 4 +- .../src/common/include/mesh_io.h | 555 +++++------------- 2 files changed, 138 insertions(+), 421 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 9a3216751..ffe0e22b4 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -229,7 +229,9 @@ struct Mesh_t size_t num_dims = 3; ///< Number of spatial dimension // ---- Element Data Definitions ---- // - size_t num_elems; ///< Number of elements in the mesh + size_t global_num_elems; ///< Global number of elements in the mesh + size_t num_elems; ///< number of local+shared elements on this process (forces usually employ this) + size_t nlocal_elems; ///< number of local elements on this process (output and reductions for energy usually employ this) size_t num_nodes_in_elem; ///< Number of nodes in an element size_t num_patches_in_elem; ///< Number of patches in an element size_t num_surfs_in_elem; ///< Number of surfaces in an element diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 681c1f3c2..838cc4844 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -52,7 +52,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include - +#define BUFFER_LINES 100000 +#define MAX_WORD 30 ///////////////////////////////////////////////////////////////////////////// /// @@ -475,18 +476,18 @@ class MeshReader } // --- read in the elements in the mesh --- - size_t num_elem = 0; + size_t num_elems = 0; - fscanf(in, "%lu", &num_elem); - printf("Number of elements read in %lu\n", num_elem); + fscanf(in, "%lu", &num_elems); + printf("Number of elements read in %lu\n", num_elems); // initialize elem variables - mesh.initialize_elems(num_elem, num_dims); - // GaussPoints.initialize(num_elem, 3); // always 3D here, even for 2D + mesh.initialize_elems(num_elems, num_dims); + // GaussPoints.initialize(num_elems, 3); // always 3D here, even for 2D // for each cell read the list of associated nodes - for (int elem_gid = 0; elem_gid < num_elem; elem_gid++) { + for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { fscanf(in, "%lu", &mesh.nodes_in_elem.host(elem_gid, node_lid)); // %d vs zu @@ -508,7 +509,7 @@ class MeshReader int tmp_ijk_indx[8]; - for (int elem_gid = 0; elem_gid < num_elem; elem_gid++) { + for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { tmp_ijk_indx[node_lid] = mesh.nodes_in_elem.host(elem_gid, convert_ensight_to_ijk[node_lid]); } @@ -521,7 +522,7 @@ class MeshReader mesh.nodes_in_elem.update_device(); // initialize corner variables - int num_corners = num_elem * mesh.num_nodes_in_elem; + int num_corners = num_elems * mesh.num_nodes_in_elem; mesh.initialize_corners(num_corners); // corner.initialize(num_corners, num_dims); @@ -666,15 +667,15 @@ class MeshReader // --- read in the elements in the mesh --- - size_t num_elem = elements.size(); - printf("Number of elements read in %lu\n", num_elem); + size_t num_elems = elements.size(); + printf("Number of elements read in %lu\n", num_elems); // initialize elem variables - mesh.initialize_elems(num_elem, num_dims); + mesh.initialize_elems(num_elems, num_dims); // for each cell read the list of associated nodes - for (int elem_gid = 0; elem_gid < num_elem; elem_gid++) { + for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { for (int node_lid = 0; node_lid < 8; node_lid++) { mesh.nodes_in_elem.host(elem_gid, node_lid) = elements[elem_gid].connectivity[node_lid]; @@ -687,7 +688,7 @@ class MeshReader mesh.nodes_in_elem.update_device(); // initialize corner variables - int num_corners = num_elem * mesh.num_nodes_in_elem; + int num_corners = num_elems * mesh.num_nodes_in_elem; mesh.initialize_corners(num_corners); // State.corner.initialize(num_corners, num_dims); @@ -799,7 +800,7 @@ class MeshReader // // look for CELLS // i = 0; - // size_t num_elem = 0; + // size_t num_elems = 0; // while (found==false) { // std::string str; // std::getline(in, str); @@ -809,13 +810,13 @@ class MeshReader // std::cout << v[0] << std::endl; // printing // // looking for the following text: - // // CELLS num_elem size + // // CELLS num_elems size // if(v[0] == "CELLS"){ - // num_elem = std::stoi(v[1]); - // printf("Number of elements read in %zu\n", num_elem); + // num_elems = std::stoi(v[1]); + // printf("Number of elements read in %zu\n", num_elems); // // initialize elem variables - // mesh.initialize_elems(num_elem, num_dims); + // mesh.initialize_elems(num_elems, num_dims); // found=true; // } // end if @@ -832,7 +833,7 @@ class MeshReader // if(myrank==0){ // // read the node ids in the element - // for (elem_gid=0; elem_gid v = split (str, delimiter); // // looking for the following text: - // // CELLS num_elem size + // // CELLS num_elems size // if(v[0] == "CELL_TYPES"){ // std::getline(in, str); @@ -944,6 +945,7 @@ class MeshReader char ch; std::string skip_line, read_line, substring; std::stringstream line_parse; + std::ifstream in; // FILE *in; std::cout<<"Reading VTK mesh"< read_buffer; + CArrayKokkos read_buffer; // read the mesh // --- Read the number of nodes in the mesh --- // - num_nodes = 0; + size_t num_nodes = 0; if (myrank == 0) { std::cout << " NUM DIM is " << num_dims << std::endl; in = new std::ifstream(); - in->open(MESH); + in.open(mesh_file_); bool found = false; while (found == false&&in->good()) { - std::getline(*in, read_line); + std::getline(in, read_line); line_parse.str(""); line_parse.clear(); line_parse << read_line; @@ -1005,50 +1006,54 @@ class MeshReader // broadcast number of nodes MPI_Bcast(&num_nodes, 1, MPI_LONG_LONG_INT, 0, world); - //allocate pre-partition node coords using contiguous decomposition - //FArray type used since CArray type still doesnt support zoltan2 decomposition - DistributedDFArray node_coords_distributed(num_nodes, num_dims); + // host_vec_array node_coords = dual_node_coords.view_host(); + // notify that the host view is going to be modified in the file readin + // dual_node_coords.modify_host(); - // construct contiguous parallel row map now that we know the number of nodes - DistributedMap map = node_coords_distributed.pmap; - // map->describe(*fos,Teuchos::VERB_EXTREME); + // old swage method + // mesh->init_nodes(local_nrows); // add 1 for index starting at 1 - // set the vertices in the mesh read in - size_t nlocal_nodes = map.size(); + //std::cout << "Num nodes assigned to task " << myrank << " = " << nlocal_nodes << std::endl; - // scope ensures view is destroyed for now to avoid calling a device view with an active host view later - { - host_vec_array node_coords = node_coords_distributed->getLocalView(Tpetra::Access::ReadWrite); - // host_vec_array node_coords = dual_node_coords.view_host(); - // notify that the host view is going to be modified in the file readin - // dual_node_coords.modify_host(); - - // old swage method - // mesh->init_nodes(local_nrows); // add 1 for index starting at 1 - - //std::cout << "Num nodes assigned to task " << myrank << " = " << nlocal_nodes << std::endl; + // read the initial mesh coordinates + // x-coords + /*only task 0 reads in nodes and elements from the input file + stores node data in a buffer and communicates once the buffer cap is reached + or the data ends*/ - // read the initial mesh coordinates - // x-coords - /*only task 0 reads in nodes and elements from the input file - stores node data in a buffer and communicates once the buffer cap is reached - or the data ends*/ + size_t num_nodes_in_elem = 1; + for (int dim = 0; dim < num_dims; dim++) { + num_nodes_in_elem *= 2; + } - words_per_line = input_options.words_per_line; - elem_words_per_line = input_options.elem_words_per_line; + words_per_line = num_dims; + elem_words_per_line = num_nodes_in_elem; - // allocate read buffer - read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); + // allocate read buffer + read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); - dof_limit = num_nodes; - buffer_iterations = dof_limit / BUFFER_LINES; - if (dof_limit % BUFFER_LINES != 0) - { - buffer_iterations++; - } + dof_limit = num_nodes; + buffer_iterations = dof_limit / BUFFER_LINES; + if (dof_limit % BUFFER_LINES != 0) + { + buffer_iterations++; + } - // read coords - read_index_start = 0; + // read coords + read_index_start = 0; + size_t nlocal_nodes; + DistributedMap map; + { //scoped so temp FArray data is auto deleted to save memory + //allocate pre-partition node coords using contiguous decomposition + //FArray type used since CArray type still doesnt support zoltan2 decomposition + DistributedDFArray node_coords_distributed(num_nodes, num_dims); + + // construct contiguous parallel row map now that we know the number of nodes + map = node_coords_distributed.pmap; + // map->describe(*fos,Teuchos::VERB_EXTREME); + + // set the vertices in the mesh read in + nlocal_nodes = map.size(); for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) { // pack buffer on rank 0 @@ -1056,7 +1061,7 @@ class MeshReader { for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) { - getline(*in, read_line); + getline(in, read_line); line_parse.clear(); line_parse.str(read_line); @@ -1075,7 +1080,7 @@ class MeshReader { buffer_loop = 0; while (buffer_iteration * BUFFER_LINES + buffer_loop < num_nodes) { - getline(*in, read_line); + getline(in, read_line); line_parse.clear(); line_parse.str(read_line); for (int iword = 0; iword < words_per_line; iword++) @@ -1116,29 +1121,33 @@ class MeshReader // extract nodal position from the read buffer // for tecplot format this is the three coords in the same line dof_value = atof(&read_buffer(scan_loop, 0, 0)); - node_coords(node_rid, 0) = dof_value * unit_scaling; + node_coords_distributed.host(node_rid, 0) = dof_value * unit_scaling; dof_value = atof(&read_buffer(scan_loop, 1, 0)); - node_coords(node_rid, 1) = dof_value * unit_scaling; + node_coords_distributed.host(node_rid, 1) = dof_value * unit_scaling; if (num_dim == 3) { dof_value = atof(&read_buffer(scan_loop, 2, 0)); - node_coords(node_rid, 2) = dof_value * unit_scaling; + node_coords_distributed.host(node_rid, 2) = dof_value * unit_scaling; } } } read_index_start += BUFFER_LINES; } - } // end of coordinate readin - // repartition node distribution - repartition_nodes(false); - - std::vector required_node_state = { node_state::coords }; - node.post_repartition_initialize(partitioned_node_map, num_dims, required_node_state); - - // synchronize device data - // dual_node_coords.sync_device(); - // dual_node_coords.modify_device(); + // end of coordinate readin + node_coords_distributed.update_device(); + // repartition node distribution + node_coords_distributed.repartition_vector(); + //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope + std::vector required_node_state = { node_state::coords }; + map = node_coords_distributed.pmap; + node.post_repartition_initialize(partitioned_map, num_dims, required_node_state); + } + //initialize some mesh data + mesh.initialize_nodes(num_nodes); + nlocal_nodes = partitioned_map.size(); + mesh.nlocal_nodes = nlocal_nodes; + // debug print of nodal data // debug print nodal positions and indices @@ -1158,8 +1167,8 @@ class MeshReader // read in element info (ensight file format is organized in element type sections) // loop over this later for several element type sections - num_elem = 0; - rnum_elem = 0; + size_t num_elems = 0; + num_elems = 0; CArrayKokkos node_store(elem_words_per_line); // --- read the number of cells in the mesh --- @@ -1168,7 +1177,7 @@ class MeshReader { bool found = false; while (found == false&&in->good()) { - std::getline(*in, read_line); + std::getline(in, read_line); line_parse.str(""); line_parse.clear(); line_parse << read_line; @@ -1178,9 +1187,9 @@ class MeshReader // CELLS num_cells size if (substring == "CELLS") { - line_parse >> num_elem; - std::cout << "declared element count: " << num_elem << std::endl; - if (num_elem <= 0) + line_parse >> num_elems; + std::cout << "declared element count: " << num_elems << std::endl; + if (num_elems <= 0) { throw std::runtime_error("ERROR, NO ELEMENTS IN MESH"); } @@ -1194,7 +1203,9 @@ class MeshReader } // end if(myrank==0) // broadcast number of elements - MPI_Bcast(&num_elem, 1, MPI_LONG_LONG_INT, 0, world); + MPI_Bcast(&num_elems, 1, MPI_LONG_LONG_INT, 0, world); + + //initialize num elem in mesh struct if (myrank == 0) { @@ -1206,7 +1217,7 @@ class MeshReader read_buffer = CArrayKokkos(BUFFER_LINES, elem_words_per_line, MAX_WORD); // calculate buffer iterations to read number of lines - buffer_iterations = num_elem / BUFFER_LINES; + buffer_iterations = num_elems / BUFFER_LINES; int assign_flag; // dynamic buffer used to store elements before we know how many this rank needs @@ -1215,13 +1226,12 @@ class MeshReader size_t buffer_max = BUFFER_LINES * elem_words_per_line; size_t indices_buffer_max = BUFFER_LINES; - if (num_elem % BUFFER_LINES != 0) + if (num_elems % BUFFER_LINES != 0) { buffer_iterations++; } read_index_start = 0; // std::cout << "ELEMENT BUFFER ITERATIONS: " << buffer_iterations << std::endl; - rnum_elem = 0; for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) { // pack buffer on rank 0 @@ -1229,7 +1239,7 @@ class MeshReader { for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) { - getline(*in, read_line); + getline(in, read_line); line_parse.clear(); line_parse.str(read_line); // disregard node count line since we're using one element type per mesh @@ -1249,8 +1259,8 @@ class MeshReader else if (myrank == 0) { buffer_loop = 0; - while (buffer_iteration * BUFFER_LINES + buffer_loop < num_elem) { - getline(*in, read_line); + while (buffer_iteration * BUFFER_LINES + buffer_loop < num_elems) { + getline(in, read_line); line_parse.clear(); line_parse.str(read_line); line_parse >> substring; @@ -1303,18 +1313,18 @@ class MeshReader // first we add the elements to a dynamically allocated list if (zero_index_base) { - if (map->isNodeGlobalElement(node_gid) && !assign_flag) + if (map.isProcessGlobalIndex(node_gid) && !assign_flag) { assign_flag = 1; - rnum_elem++; + num_elems++; } } else { - if (map->isNodeGlobalElement(node_gid - 1) && !assign_flag) + if (map.isProcessGlobalIndex(node_gid - 1) && !assign_flag) { assign_flag = 1; - rnum_elem++; + num_elems++; } } } @@ -1323,29 +1333,36 @@ class MeshReader { for (int inode = 0; inode < elem_words_per_line; inode++) { - if ((rnum_elem - 1) * elem_words_per_line + inode >= buffer_max) + if ((num_elems - 1) * elem_words_per_line + inode >= buffer_max) { - element_temp.resize((rnum_elem - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line); - buffer_max = (rnum_elem - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line; + element_temp.resize((num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line); + buffer_max = (num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line; } - element_temp[(rnum_elem - 1) * elem_words_per_line + inode] = node_store(inode); - // std::cout << "VECTOR STORAGE FOR ELEM " << rnum_elem << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; + element_temp[(num_elems - 1) * elem_words_per_line + inode] = node_store(inode); + // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; } // assign global element id to temporary list - if (rnum_elem - 1 >= indices_buffer_max) + if (num_elems - 1 >= indices_buffer_max) { - global_indices_temp.resize(rnum_elem - 1 + BUFFER_LINES); - indices_buffer_max = rnum_elem - 1 + BUFFER_LINES; + global_indices_temp.resize(num_elems - 1 + BUFFER_LINES); + indices_buffer_max = num_elems - 1 + BUFFER_LINES; } - global_indices_temp[rnum_elem - 1] = elem_gid; + global_indices_temp[num_elems - 1] = elem_gid; } } read_index_start += BUFFER_LINES; } - // std::cout << "RNUM ELEMENTS IS: " << rnum_elem << std::endl; + //initialize elem data structures on this process + mesh.initialize_elems(num_elems); + + //set global and local shared element counts + mesh.num_elems = num_elems; + mesh.num_elems = num_elems; - Element_Types = CArrayKokkos(rnum_elem); + // std::cout << "RNUM ELEMENTS IS: " << num_elems << std::endl; + + Element_Types = CArrayKokkos(num_elems); elements::elem_types::elem_type mesh_element_type; @@ -1408,17 +1425,17 @@ class MeshReader } // 1 type per mesh for now - for (int ielem = 0; ielem < rnum_elem; ielem++) + for (int ielem = 0; ielem < num_elems; ielem++) { Element_Types(ielem) = mesh_element_type; } // copy temporary element storage to multivector storage - dual_nodes_in_elem = dual_elem_conn_array("dual_nodes_in_elem", rnum_elem, max_nodes_per_element); + dual_nodes_in_elem = dual_elem_conn_array("dual_nodes_in_elem", num_elems, max_nodes_per_element); host_elem_conn_array nodes_in_elem = dual_nodes_in_elem.view_host(); dual_nodes_in_elem.modify_host(); - for (int ielem = 0; ielem < rnum_elem; ielem++) + for (int ielem = 0; ielem < num_elems; ielem++) { for (int inode = 0; inode < elem_words_per_line; inode++) { @@ -1427,10 +1444,10 @@ class MeshReader } // view storage for all local elements connected to local nodes on this rank - // DCArrayKokkos All_Element_Global_Indices(rnum_elem); - Kokkos::DualView All_Element_Global_Indices("All_Element_Global_Indices", rnum_elem); + // DCArrayKokkos All_Element_Global_Indices(num_elems); + Kokkos::DualView All_Element_Global_Indices("All_Element_Global_Indices", num_elems); // copy temporary global indices storage to view storage - for (int ielem = 0; ielem < rnum_elem; ielem++) + for (int ielem = 0; ielem < num_elems; ielem++) { All_Element_Global_Indices.h_view(ielem) = global_indices_temp[ielem]; if (global_indices_temp[ielem] < 0) @@ -1458,10 +1475,10 @@ class MeshReader // debug print /* - Kokkos::View All_Element_Global_Indices_pass("All_Element_Global_Indices_pass",rnum_elem); + Kokkos::View All_Element_Global_Indices_pass("All_Element_Global_Indices_pass",num_elems); deep_copy(All_Element_Global_Indices_pass, All_Element_Global_Indices.h_view); std::cout << " ------------ELEMENT GLOBAL INDICES ON TASK " << myrank << " --------------"<choose_2Delem_type(Element_Types(cell_rid), elem2D); @@ -1515,7 +1532,7 @@ class MeshReader if (num_dim == 3) { - for (int cell_rid = 0; cell_rid < rnum_elem; cell_rid++) + for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) { // set nodes per element element_select->choose_3Delem_type(Element_Types(cell_rid), elem); @@ -1533,308 +1550,6 @@ class MeshReader } } - //If restarting a topology optimization run, obtain nodal design density data here - if(topology_optimization_restart){ - design_node_densities_distributed = Teuchos::rcp(new MV(map, 1)); - host_vec_array node_densities = design_node_densities_distributed->getLocalView (Tpetra::Access::ReadWrite); - if (myrank == 0) - { - bool found = false; - while (found == false&&in->good()) { - std::getline(*in, read_line); - //std::cout << read_line << std::endl; - line_parse.clear(); - line_parse.str(read_line); - - //stop when the design_density string is reached - while (!line_parse.eof()){ - line_parse >> substring; - //std::cout << substring << std::endl; - if(!substring.compare("design_density")){ - found = true; - } - } //while - - } // end while - - if (!found){ - throw std::runtime_error("ERROR: Failed to find design_density"); - } // end if - - //skip "LOOKUP_TABLE default" line - std::getline(*in, read_line); - } // end if(myrank==0) - - //read in density of each node - // allocate read buffer - words_per_line = 1; - read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); - - dof_limit = num_nodes; - buffer_iterations = dof_limit / BUFFER_LINES; - if (dof_limit % BUFFER_LINES != 0) - { - buffer_iterations++; - } - - // read densities - read_index_start = 0; - for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) - { - // pack buffer on rank 0 - if (myrank == 0 && buffer_iteration < buffer_iterations - 1) - { - for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) - { - getline(*in, read_line); - line_parse.clear(); - line_parse.str(read_line); - - for (int iword = 0; iword < words_per_line; iword++) - { - // read portions of the line into the substring variable - line_parse >> substring; - // debug print - // std::cout<<" "<< substring <> substring; - // debug print - // std::cout<<" "<< substring <isNodeGlobalElement(node_gid)) - { - // set local node index in this mpi rank - node_rid = map->getLocalElement(node_gid); - // extract nodal position from the read buffer - // for tecplot format this is the three coords in the same line - dof_value = atof(&read_buffer(scan_loop, 0, 0)); - node_densities(node_rid, 0) = dof_value; - } - } - read_index_start += BUFFER_LINES; - } - - //Find initial objective value to normalize by - if (myrank == 0 && simparam.optimization_options.normalized_objective) - { - bool found = false; - while (found == false&&in->good()) { - std::getline(*in, read_line); - //std::cout << read_line << std::endl; - line_parse.clear(); - line_parse.str(read_line); - - //stop when the design_density string is reached - while (!line_parse.eof()){ - line_parse >> substring; - //std::cout << substring << std::endl; - if(!substring.compare("Objective_Normalization_Constant")){ - found = true; - line_parse >> substring; - simparam.optimization_options.objective_normalization_constant = stod(substring); - } - } //while - - } // end while - - if (!found){ - throw std::runtime_error("ERROR: Failed to find initial objective value for restart"); - } // end if - } // end if(myrank==0) - } - - //If restarting a topology optimization run, obtain nodal design density data here - if(shape_optimization_restart){ - design_node_coords_distributed = Teuchos::rcp(new MV(map, num_dim)); - host_vec_array design_node_coords = design_node_coords_distributed->getLocalView (Tpetra::Access::ReadWrite); - if (myrank == 0) - { - bool found = false; - while (found == false&&in->good()) { - std::getline(*in, read_line); - //std::cout << read_line << std::endl; - line_parse.clear(); - line_parse.str(read_line); - - //stop when the design_density string is reached - while (!line_parse.eof()){ - line_parse >> substring; - //std::cout << substring << std::endl; - if(!substring.compare("design_coordinates")){ - found = true; - } - } //while - - } // end while - - if (!found){ - throw std::runtime_error("ERROR: Failed to find design_coordinates"); - } // end if - - //skip "LOOKUP_TABLE default" line - std::getline(*in, read_line); - } // end if(myrank==0) - - //read in density of each node - // allocate read buffer - words_per_line = num_dim; - read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); - - dof_limit = num_nodes; - buffer_iterations = dof_limit / BUFFER_LINES; - if (dof_limit % BUFFER_LINES != 0) - { - buffer_iterations++; - } - - // read densities - read_index_start = 0; - for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) - { - // pack buffer on rank 0 - if (myrank == 0 && buffer_iteration < buffer_iterations - 1) - { - for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) - { - getline(*in, read_line); - line_parse.clear(); - line_parse.str(read_line); - - for (int iword = 0; iword < words_per_line; iword++) - { - // read portions of the line into the substring variable - line_parse >> substring; - // debug print - // std::cout<<" "<< substring <> substring; - // debug print - // std::cout<<" "<< substring <isNodeGlobalElement(node_gid)) - { - // set local node index in this mpi rank - node_rid = map->getLocalElement(node_gid); - // extract nodal position from the read buffer - // for tecplot format this is the three coords in the same line - dof_value = atof(&read_buffer(scan_loop, 0, 0)); - design_node_coords(node_rid, 0) = dof_value * unit_scaling; - dof_value = atof(&read_buffer(scan_loop, 1, 0)); - design_node_coords(node_rid, 1) = dof_value * unit_scaling; - if(num_dim==3){ - dof_value = atof(&read_buffer(scan_loop, 2, 0)); - design_node_coords(node_rid, 2) = dof_value * unit_scaling; - } - } - } - read_index_start += BUFFER_LINES; - } - - //Find initial objective value to normalize by - if (myrank == 0 && simparam.optimization_options.normalized_objective) - { - bool found = false; - while (found == false&&in->good()) { - std::getline(*in, read_line); - //std::cout << read_line << std::endl; - line_parse.clear(); - line_parse.str(read_line); - - //stop when the design_density string is reached - while (!line_parse.eof()){ - line_parse >> substring; - //std::cout << substring << std::endl; - if(!substring.compare("Objective_Normalization_Constant")){ - found = true; - line_parse >> substring; - simparam.optimization_options.objective_normalization_constant = stod(substring); - } - } //while - - } // end while - - if (!found){ - throw std::runtime_error("ERROR: Failed to find initial objective value for restart"); - } // end if - } // end if(myrank==0) - } - // Close mesh input file if (myrank == 0) { From 519c236712dcbe90826d4f0aceaf49cf2835b21c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 25 Apr 2025 00:58:06 -0600 Subject: [PATCH 07/66] WIP: MPI work --- .../src/common/include/mesh.h | 17 +- .../src/common/include/mesh_io.h | 230 +++++------------- single-node-refactor/src/driver.cpp | 7 +- single-node-refactor/src/main.cpp | 2 + 4 files changed, 75 insertions(+), 181 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index ffe0e22b4..eebfbe015 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -240,7 +240,7 @@ struct Mesh_t size_t num_leg_gauss_in_elem; ///< Number of Gauss Legendre points in an element size_t num_lob_gauss_in_elem; ///< Number of Gauss Lobatto points in an element - DCArrayKokkos nodes_in_elem; ///< Nodes in an element + DistributedDCArray nodes_in_elem; ///< Nodes in an element CArrayKokkos corners_in_elem; ///< Corners in an element -- this can just be a functor RaggedRightArrayKokkos elems_in_elem; ///< Elements connected to an element @@ -264,8 +264,8 @@ struct Mesh_t DistributedMap node_map; ///< partition of local nodes (stores global node IDs on each process) DistributedMap all_node_map; ///< partition of local + ghost nodes (stores global node IDs on each process) DistributedMap ghost_node_map; ///< partition of local + ghost nodes (stores global node IDs on each process) - DistributedMap element_map; ///< partition of uniquely owned elements (stores global node IDs on each process) - DistributedMap all_element_map; ///< partition of uniquely owned + shared elements (stores global node IDs on each process) + DistributedMap local_element_map; ///< partition of uniquely owned elements (stores global node IDs on each process) + DistributedMap element_map; ///< partition of uniquely owned + shared elements (stores global node IDs on each process) DistributedMap nonoverlap_element_node_map; // map of node indices belonging to unique element map RaggedRightArrayKokkos corners_in_node; ///< Corners connected to a node @@ -326,16 +326,11 @@ struct Mesh_t }; // end method // initialization methods - void initialize_elems(const size_t num_elems_inp, const size_t num_dims_inp) + void initialize_elems(const size_t num_elems_inp, const size_t num_nodes_in_elem, const DistributedMap input_element_map) { - num_dims = num_dims_inp; - num_nodes_in_elem = 1; - - for (int dim = 0; dim < num_dims; dim++) { - num_nodes_in_elem *= 2; - } num_elems = num_elems_inp; - nodes_in_elem = DCArrayKokkos(num_elems, num_nodes_in_elem, "mesh.nodes_in_elem"); + element_map = input_element_map; + nodes_in_elem = DistributedDCArray(element_map, num_nodes_in_elem, "mesh.nodes_in_elem"); corners_in_elem = CArrayKokkos(num_elems, num_nodes_in_elem, "mesh.corners_in_elem"); // 1 Gauss point per element diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 838cc4844..1e60d332e 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -976,7 +976,7 @@ class MeshReader in.open(mesh_file_); bool found = false; - while (found == false&&in->good()) { + while (found == false&&in.good()) { std::getline(in, read_line); line_parse.str(""); line_parse.clear(); @@ -1167,16 +1167,16 @@ class MeshReader // read in element info (ensight file format is organized in element type sections) // loop over this later for several element type sections - size_t num_elems = 0; - num_elems = 0; - CArrayKokkos node_store(elem_words_per_line); + size_t global_num_elems = 0; + size_t num_elems = 0; + CArrayKokkos node_store(elem_words_per_line); // --- read the number of cells in the mesh --- // --- Read the number of vertices in the mesh --- // if (myrank == 0) { bool found = false; - while (found == false&&in->good()) { + while (found == false&&in.good()) { std::getline(in, read_line); line_parse.str(""); line_parse.clear(); @@ -1187,9 +1187,9 @@ class MeshReader // CELLS num_cells size if (substring == "CELLS") { - line_parse >> num_elems; - std::cout << "declared element count: " << num_elems << std::endl; - if (num_elems <= 0) + line_parse >> global_num_elems; + std::cout << "declared element count: " << global_num_elems << std::endl; + if (global_num_elems <= 0) { throw std::runtime_error("ERROR, NO ELEMENTS IN MESH"); } @@ -1203,7 +1203,7 @@ class MeshReader } // end if(myrank==0) // broadcast number of elements - MPI_Bcast(&num_elems, 1, MPI_LONG_LONG_INT, 0, world); + MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, world); //initialize num elem in mesh struct @@ -1214,10 +1214,10 @@ class MeshReader // read in element connectivity // we're gonna reallocate for the words per line expected for the element connectivity - read_buffer = CArrayKokkos(BUFFER_LINES, elem_words_per_line, MAX_WORD); + read_buffer = CArrayKokkos(BUFFER_LINES, elem_words_per_line, MAX_WORD); // calculate buffer iterations to read number of lines - buffer_iterations = num_elems / BUFFER_LINES; + buffer_iterations = global_num_elems / BUFFER_LINES; int assign_flag; // dynamic buffer used to store elements before we know how many this rank needs @@ -1226,7 +1226,7 @@ class MeshReader size_t buffer_max = BUFFER_LINES * elem_words_per_line; size_t indices_buffer_max = BUFFER_LINES; - if (num_elems % BUFFER_LINES != 0) + if (global_num_elems % BUFFER_LINES != 0) { buffer_iterations++; } @@ -1259,7 +1259,7 @@ class MeshReader else if (myrank == 0) { buffer_loop = 0; - while (buffer_iteration * BUFFER_LINES + buffer_loop < num_elems) { + while (buffer_iteration * BUFFER_LINES + buffer_loop < global_num_elems) { getline(in, read_line); line_parse.clear(); line_parse.str(read_line); @@ -1353,103 +1353,15 @@ class MeshReader read_index_start += BUFFER_LINES; } - //initialize elem data structures on this process - mesh.initialize_elems(num_elems); - //set global and local shared element counts - mesh.num_elems = num_elems; - mesh.num_elems = num_elems; - - // std::cout << "RNUM ELEMENTS IS: " << num_elems << std::endl; - - Element_Types = CArrayKokkos(num_elems); - - elements::elem_types::elem_type mesh_element_type; - - if (simparam.num_dims == 2) - { - if (input_options.element_type == ELEMENT_TYPE::quad4) - { - mesh_element_type = elements::elem_types::Quad4; - max_nodes_per_patch = 2; - } - else if (input_options.element_type == ELEMENT_TYPE::quad8) - { - mesh_element_type = elements::elem_types::Quad8; - max_nodes_per_patch = 3; - } - else if (input_options.element_type == ELEMENT_TYPE::quad12) - { - mesh_element_type = elements::elem_types::Quad12; - max_nodes_per_patch = 4; - } - else - { - if (myrank == 0) - { - std::cout << "ELEMENT TYPE UNRECOGNIZED" << std::endl; - } - exit_solver(0); - } - element_select->choose_2Delem_type(mesh_element_type, elem2D); - max_nodes_per_element = elem2D->num_nodes(); - } - - if (simparam.num_dims == 3) - { - if (input_options.element_type == ELEMENT_TYPE::hex8) - { - mesh_element_type = elements::elem_types::Hex8; - max_nodes_per_patch = 4; - } - else if (input_options.element_type == ELEMENT_TYPE::hex20) - { - mesh_element_type = elements::elem_types::Hex20; - max_nodes_per_patch = 8; - } - else if (input_options.element_type == ELEMENT_TYPE::hex32) - { - mesh_element_type = elements::elem_types::Hex32; - max_nodes_per_patch = 12; - } - else - { - if (myrank == 0) - { - std::cout << "ELEMENT TYPE UNRECOGNIZED" << std::endl; - } - exit_solver(0); - } - element_select->choose_3Delem_type(mesh_element_type, elem); - max_nodes_per_element = elem->num_nodes(); - } - - // 1 type per mesh for now - for (int ielem = 0; ielem < num_elems; ielem++) - { - Element_Types(ielem) = mesh_element_type; - } - - // copy temporary element storage to multivector storage - dual_nodes_in_elem = dual_elem_conn_array("dual_nodes_in_elem", num_elems, max_nodes_per_element); - host_elem_conn_array nodes_in_elem = dual_nodes_in_elem.view_host(); - dual_nodes_in_elem.modify_host(); - - for (int ielem = 0; ielem < num_elems; ielem++) - { - for (int inode = 0; inode < elem_words_per_line; inode++) - { - nodes_in_elem(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; - } - } + mesh.global_num_elems = global_num_elems; - // view storage for all local elements connected to local nodes on this rank - // DCArrayKokkos All_Element_Global_Indices(num_elems); - Kokkos::DualView All_Element_Global_Indices("All_Element_Global_Indices", num_elems); + // construct partition mapping for shared elements on each process + DCArrayKokkos All_Element_Global_Indices(num_elems); // copy temporary global indices storage to view storage for (int ielem = 0; ielem < num_elems; ielem++) { - All_Element_Global_Indices.h_view(ielem) = global_indices_temp[ielem]; + All_Element_Global_Indices.host(ielem) = global_indices_temp[ielem]; if (global_indices_temp[ielem] < 0) { negative_index_found = 1; @@ -1463,29 +1375,33 @@ class MeshReader { std::cout << "Node index less than or equal to zero detected; set \"zero_index_base: true\" under \"input_options\" in your yaml file if indices start at 0" << std::endl; } - exit_solver(0); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + exit(0); } // delete temporary element connectivity and index storage std::vector().swap(element_temp); std::vector().swap(global_indices_temp); - All_Element_Global_Indices.modify_host(); - All_Element_Global_Indices.sync_device(); + All_Element_Global_Indices.update_device(); - // debug print - /* - Kokkos::View All_Element_Global_Indices_pass("All_Element_Global_Indices_pass",num_elems); - deep_copy(All_Element_Global_Indices_pass, All_Element_Global_Indices.h_view); - std::cout << " ------------ELEMENT GLOBAL INDICES ON TASK " << myrank << " --------------"<(Teuchos::OrdinalTraits::invalid(), All_Element_Global_Indices.d_view, 0, comm)); + //initialize elem data structures + mesh.initialize_elems(num_elems, num_nodes_in_elem, element_map); + + // copy temporary element storage to distributed storage + DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; + + for (int ielem = 0; ielem < num_elems; ielem++) + { + for (int inode = 0; inode < elem_words_per_line; inode++) + { + nodes_in_elem.host(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; + } + } // element type selection (subject to change) // ---- Set Element Type ---- // @@ -1496,64 +1412,44 @@ class MeshReader // Convert ensight index system to the ijk finite element numbering convention // for vertices in cell - if (active_node_ordering_convention == IJK) + CArrayKokkos convert_ensight_to_ijk(max_nodes_per_element); + CArrayKokkos tmp_ijk_indx(max_nodes_per_element); + convert_ensight_to_ijk(0) = 0; + convert_ensight_to_ijk(1) = 1; + convert_ensight_to_ijk(2) = 3; + convert_ensight_to_ijk(3) = 2; + convert_ensight_to_ijk(4) = 4; + convert_ensight_to_ijk(5) = 5; + convert_ensight_to_ijk(6) = 7; + convert_ensight_to_ijk(7) = 6; + + for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) { - CArrayKokkos convert_ensight_to_ijk(max_nodes_per_element); - CArrayKokkos tmp_ijk_indx(max_nodes_per_element); - convert_ensight_to_ijk(0) = 0; - convert_ensight_to_ijk(1) = 1; - convert_ensight_to_ijk(2) = 3; - convert_ensight_to_ijk(3) = 2; - convert_ensight_to_ijk(4) = 4; - convert_ensight_to_ijk(5) = 5; - convert_ensight_to_ijk(6) = 7; - convert_ensight_to_ijk(7) = 6; - - int nodes_per_element; - - if (num_dim == 2) + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) - { - // set nodes per element - element_select->choose_2Delem_type(Element_Types(cell_rid), elem2D); - nodes_per_element = elem2D->num_nodes(); - for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) - { - tmp_ijk_indx(node_lid) = nodes_in_elem(cell_rid, convert_ensight_to_ijk(node_lid)); - } - - for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) - { - nodes_in_elem(cell_rid, node_lid) = tmp_ijk_indx(node_lid); - } - } + tmp_ijk_indx(node_lid) = nodes_in_elem.host(cell_rid, convert_ensight_to_ijk(node_lid)); } - if (num_dim == 3) + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) - { - // set nodes per element - element_select->choose_3Delem_type(Element_Types(cell_rid), elem); - nodes_per_element = elem->num_nodes(); - for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) - { - tmp_ijk_indx(node_lid) = nodes_in_elem(cell_rid, convert_ensight_to_ijk(node_lid)); - } - - for (int node_lid = 0; node_lid < nodes_per_element; node_lid++) - { - nodes_in_elem(cell_rid, node_lid) = tmp_ijk_indx(node_lid); - } - } + nodes_in_elem.host(cell_rid, node_lid) = tmp_ijk_indx(node_lid); } } + + + nodes_in_elem.update_device(); + + // initialize corner variables + size_t num_corners = num_elems * num_nodes_in_elem; + mesh.initialize_corners(num_corners); + + // Build connectivity + mesh.build_connectivity(); // Close mesh input file if (myrank == 0) { - in->close(); + in.close(); } } // end read_mesh diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index 7da885198..1cc1daf6a 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -65,8 +65,8 @@ void Driver::initialize() std::cout << "Exception " << e.Type() << ": " << e.what() << std::endl; } - MPI_Finalize(); MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); exit(0); } @@ -82,6 +82,7 @@ void Driver::initialize() std::cout << "Mesh file path: " << SimulationParameters.mesh_input.file_path << std::endl; } mesh_reader.set_mesh_file(SimulationParameters.mesh_input.file_path.data()); + mesh.num_dims = num_dims; mesh_reader.read_mesh(mesh, State, SimulationParameters.mesh_input, @@ -96,8 +97,8 @@ void Driver::initialize() } else{ throw std::runtime_error("**** NO MESH INPUT OPTIONS PROVIDED IN YAML ****"); - MPI_Finalize(); MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); exit(0); } @@ -277,8 +278,8 @@ void Driver::initialize() } // end if SGTM solver else { throw std::runtime_error("**** NO SOLVER INPUT OPTIONS PROVIDED IN YAML, OR OPTION NOT UNDERSTOOD ****"); - MPI_Finalize(); MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); exit(0); } diff --git a/single-node-refactor/src/main.cpp b/single-node-refactor/src/main.cpp index c6476a894..543c0c90a 100644 --- a/single-node-refactor/src/main.cpp +++ b/single-node-refactor/src/main.cpp @@ -135,6 +135,8 @@ int main(int argc, char* argv[]) if(myrank == 0){ std::cout << "**** End of main **** " << std::endl; } + + MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); return 0; } From 6c78942befa3719d168605b16de16c5e17c86b1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 23 May 2025 09:54:02 -0600 Subject: [PATCH 08/66] BUG: merge corrections --- single-node-refactor/src/common/include/state.h | 3 +++ .../Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 9c9a31f91..26e6bfc22 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -347,6 +347,9 @@ struct node_t case node_state::gradient_level_set: if (gradient_level_set.size() == 0) this->gradient_level_set = DCArrayKokkos(num_nodes, num_dims, "node_grad_levelset"); break; + case node_state::gradient_level_set: + if (gradient_level_set.size() == 0) this->gradient_level_set = DCArrayKokkos(num_nodes, num_dims, "node_grad_levelset"); + break; default: std::cout<<"Desired node state not understood in node_t initialize"<get_vol_hex_ugradient(volume_gradients, elem_id, node_coords, elem_node_gids, rk_level); - // cut out the node_gids for this element - ViewCArrayKokkos elem_node_gids(&nodes_in_elem(elem_id, 0), 8); - // gradients of the element volume FEM_SGH_->get_vol_hex_ugradient(volume_gradients, elem_id, node_coords, elem_node_gids, rk_level); From febd0320c90888544b1816f6b91abd88ed81d7dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sat, 24 May 2025 14:25:28 -0600 Subject: [PATCH 09/66] WIP: MPI changes --- .../src/common/include/geometry_new.h | 18 +- .../src/common/include/mesh.h | 13 +- .../src/common/include/mesh_io.h | 1813 +++++++++-------- .../src/common/include/state.h | 8 +- .../src/common/src/geometry_new.cpp | 18 +- .../src/common/src/region_fill.cpp | 2 +- single-node-refactor/src/driver.cpp | 8 +- 7 files changed, 941 insertions(+), 939 deletions(-) diff --git a/single-node-refactor/src/common/include/geometry_new.h b/single-node-refactor/src/common/include/geometry_new.h index 54153823e..b71a4be8d 100644 --- a/single-node-refactor/src/common/include/geometry_new.h +++ b/single-node-refactor/src/common/include/geometry_new.h @@ -70,7 +70,7 @@ namespace geometry KOKKOS_FUNCTION void get_bmatrix(const ViewCArrayKokkos& B_matrix, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids); ///////////////////////////////////////////////////////////////////////////// @@ -88,7 +88,7 @@ void get_bmatrix(const ViewCArrayKokkos& B_matrix, KOKKOS_FUNCTION void get_vol_quad(const DCArrayKokkos& elem_vol, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids); ///////////////////////////////////////////////////////////////////////////// @@ -106,7 +106,7 @@ void get_vol_quad(const DCArrayKokkos& elem_vol, KOKKOS_FUNCTION void get_vol_hex(const DCArrayKokkos& elem_vol, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids); ///////////////////////////////////////////////////////////////////////////// @@ -117,7 +117,7 @@ void get_vol_hex(const DCArrayKokkos& elem_vol, /// ///////////////////////////////////////////////////////////////////////////// void get_vol(const DCArrayKokkos& elem_vol, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const Mesh_t& mesh); ///////////////////////////////////////////////////////////////////////////// @@ -135,7 +135,7 @@ void get_vol(const DCArrayKokkos& elem_vol, KOKKOS_FUNCTION void get_bmatrix2D(const ViewCArrayKokkos& B_matrix, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids); ///////////////////////////////////////////////////////////////////////////// @@ -153,7 +153,7 @@ void get_bmatrix2D(const ViewCArrayKokkos& B_matrix, ///////////////////////////////////////////////////////////////////////////// KOKKOS_FUNCTION double get_area_quad(const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids); ///////////////////////////////////////////////////////////////////////////// @@ -195,7 +195,7 @@ double heron(const double x1, KOKKOS_FUNCTION void get_area_weights2D(const ViewCArrayKokkos& corner_areas, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids); } // end namespace @@ -222,7 +222,7 @@ size_t check_bdy(const size_t patch_gid, const double orig_y, const double orig_z, const Mesh_t& mesh, - const DCArrayKokkos& node_coords); + const DistributedDCArray& node_coords); ///////////////////////////////////////////////////////////////////////////// /// @@ -237,7 +237,7 @@ size_t check_bdy(const size_t patch_gid, ///////////////////////////////////////////////////////////////////////////// void tag_bdys(const BoundaryCondition_t& boundary, Mesh_t& mesh, - const DCArrayKokkos& node_coords); + const DistributedDCArray& node_coords); ///////////////////////////////////////////////////////////////////////////// diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index f02bd1943..8486438b8 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -255,10 +255,10 @@ struct Mesh_t legendre_in_elem_t legendre_in_elem; ///< Gauss Legendre points in an element // ---- Node Data Definitions ---- // - size_t num_nodes; ///< Global Number of nodes in the mesh - size_t nlocal_nodes; ///< number of nodes local to this process - size_t nall_nodes; ///< number of local + ghost nodes on this process - size_t nghost_nodes; ///< number of ghost nodes on this process + size_t global_num_nodes; ///< Global Number of nodes in the mesh + size_t num_nodes; ///< number of local + ghost nodes on this process + size_t num_local_nodes; ///< number of nodes local to this process + size_t num_ghost_nodes; ///< number of ghost nodes on this process //distributed map definitions DistributedMap node_map; ///< partition of local nodes (stores global node IDs on each process) @@ -351,7 +351,8 @@ struct Mesh_t const size_t num_zones_in_elem_inp, const size_t num_nodes_in_zone_inp, const size_t num_surfs_in_elem_inp, - const size_t num_dims_inp) + const size_t num_dims_inp, + const DistributedMap input_element_map) { num_dims = num_dims_inp; num_elems = num_elems_inp; @@ -364,7 +365,7 @@ struct Mesh_t num_zones = num_zones_in_elem * num_elems; - nodes_in_elem = DCArrayKokkos(num_elems, num_nodes_in_elem, "mesh.nodes_in_elem"); + nodes_in_elem = DistributedDCArray(element_map, num_nodes_in_elem, "mesh.nodes_in_elem"); corners_in_elem = CArrayKokkos(num_elems, num_nodes_in_elem, "mesh.corners_in_elem"); zones_in_elem = zones_in_elem_t(num_zones_in_elem); surfs_in_elem = CArrayKokkos(num_elems, num_surfs_in_elem, "mesh.surfs_in_zone"); diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 369619556..138b81afd 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -366,16 +366,16 @@ class MeshReader std::cout << "File extension is: " << extension << std::endl; if(extension == "geo"){ // Ensight meshfile extension - read_ensight_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); + //read_ensight_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); } else if(extension == "inp"){ // Abaqus meshfile extension - read_Abaqus_mesh(mesh, State, num_dims); + //read_Abaqus_mesh(mesh, State, num_dims); } else if(extension == "vtk"){ // vtk file format read_vtk_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); } else if(extension == "vtu"){ // vtu file format - read_vtu_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); + //read_vtu_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); } else{ throw std::runtime_error("**** Mesh file extension not understood ****"); @@ -396,144 +396,144 @@ class MeshReader /// \param Number of dimensions /// ///////////////////////////////////////////////////////////////////////////// - void read_ensight_mesh(Mesh_t& mesh, - GaussPoint_t& GaussPoints, - node_t& node, - corner_t& corner, - mesh_input_t& mesh_inps, - int num_dims) - { - FILE* in; - char ch; + // void read_ensight_mesh(Mesh_t& mesh, + // GaussPoint_t& GaussPoints, + // node_t& node, + // corner_t& corner, + // mesh_input_t& mesh_inps, + // int num_dims) + // { + // FILE* in; + // char ch; - size_t num_nodes_in_elem = 1; - for (int dim = 0; dim < num_dims; dim++) { - num_nodes_in_elem *= 2; - } + // size_t num_nodes_in_elem = 1; + // for (int dim = 0; dim < num_dims; dim++) { + // num_nodes_in_elem *= 2; + // } - // read the mesh WARNING: assumes a .geo file - in = fopen(mesh_file_, "r"); + // // read the mesh WARNING: assumes a .geo file + // in = fopen(mesh_file_, "r"); - // skip 8 lines - for (int j = 1; j <= 8; j++) { - int i = 0; - while ((ch = (char)fgetc(in)) != '\n') { - i++; - } - } + // // skip 8 lines + // for (int j = 1; j <= 8; j++) { + // int i = 0; + // while ((ch = (char)fgetc(in)) != '\n') { + // i++; + // } + // } - // --- Read in the nodes in the mesh --- + // // --- Read in the nodes in the mesh --- - size_t num_nodes = 0; + // size_t num_nodes = 0; - fscanf(in, "%lu", &num_nodes); - printf("Number of nodes read in %lu\n", num_nodes); + // fscanf(in, "%lu", &num_nodes); + // printf("Number of nodes read in %lu\n", num_nodes); - mesh.initialize_nodes(num_nodes); + // mesh.initialize_nodes(num_nodes); - // initialize node state variables, for now, we just need coordinates, the rest will be initialize by the respective solvers - std::vector required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dims, required_node_state); + // // initialize node state variables, for now, we just need coordinates, the rest will be initialize by the respective solvers + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_nodes, num_dims, required_node_state); - // read the initial mesh coordinates - // x-coords - for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { - fscanf(in, "%le", &node.coords.host(node_id, 0)); - node.coords.host(node_id, 0)*= mesh_inps.scale_x; - } + // // read the initial mesh coordinates + // // x-coords + // for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { + // fscanf(in, "%le", &node.coords.host(node_id, 0)); + // node.coords.host(node_id, 0)*= mesh_inps.scale_x; + // } - // y-coords - for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { - fscanf(in, "%le", &node.coords.host(node_id, 1)); - node.coords.host(node_id, 1)*= mesh_inps.scale_y; - } + // // y-coords + // for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { + // fscanf(in, "%le", &node.coords.host(node_id, 1)); + // node.coords.host(node_id, 1)*= mesh_inps.scale_y; + // } - // z-coords - for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { - if (num_dims == 3) { - fscanf(in, "%le", &node.coords.host(node_id, 2)); - node.coords.host(node_id, 2)*= mesh_inps.scale_z; - } - else{ - double dummy; - fscanf(in, "%le", &dummy); - } - } // end for + // // z-coords + // for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { + // if (num_dims == 3) { + // fscanf(in, "%le", &node.coords.host(node_id, 2)); + // node.coords.host(node_id, 2)*= mesh_inps.scale_z; + // } + // else{ + // double dummy; + // fscanf(in, "%le", &dummy); + // } + // } // end for - // Update device nodal positions - node.coords.update_device(); + // // Update device nodal positions + // node.coords.update_device(); - ch = (char)fgetc(in); + // ch = (char)fgetc(in); - // skip 1 line - for (int j = 1; j <= 1; j++) { - int i = 0; - while ((ch = (char)fgetc(in)) != '\n') { - i++; - } - } + // // skip 1 line + // for (int j = 1; j <= 1; j++) { + // int i = 0; + // while ((ch = (char)fgetc(in)) != '\n') { + // i++; + // } + // } - // --- read in the elements in the mesh --- - size_t num_elems = 0; + // // --- read in the elements in the mesh --- + // size_t num_elems = 0; - fscanf(in, "%lu", &num_elems); - printf("Number of elements read in %lu\n", num_elems); + // fscanf(in, "%lu", &num_elems); + // printf("Number of elements read in %lu\n", num_elems); - // initialize elem variables - mesh.initialize_elems(num_elems, num_dims); - // GaussPoints.initialize(num_elems, 3); // always 3D here, even for 2D + // // initialize elem variables + // mesh.initialize_elems(num_elems, num_dims); + // // GaussPoints.initialize(num_elems, 3); // always 3D here, even for 2D - // for each cell read the list of associated nodes - for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - fscanf(in, "%lu", &mesh.nodes_in_elem.host(elem_gid, node_lid)); // %d vs zu + // // for each cell read the list of associated nodes + // for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { + // for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { + // fscanf(in, "%lu", &mesh.nodes_in_elem.host(elem_gid, node_lid)); // %d vs zu - // shift to start node index space at 0 - mesh.nodes_in_elem.host(elem_gid, node_lid) -= 1; - } - } + // // shift to start node index space at 0 + // mesh.nodes_in_elem.host(elem_gid, node_lid) -= 1; + // } + // } - // Convert from ensight to IJK mesh - int convert_ensight_to_ijk[8]; - convert_ensight_to_ijk[0] = 0; - convert_ensight_to_ijk[1] = 1; - convert_ensight_to_ijk[2] = 3; - convert_ensight_to_ijk[3] = 2; - convert_ensight_to_ijk[4] = 4; - convert_ensight_to_ijk[5] = 5; - convert_ensight_to_ijk[6] = 7; - convert_ensight_to_ijk[7] = 6; + // // Convert from ensight to IJK mesh + // int convert_ensight_to_ijk[8]; + // convert_ensight_to_ijk[0] = 0; + // convert_ensight_to_ijk[1] = 1; + // convert_ensight_to_ijk[2] = 3; + // convert_ensight_to_ijk[3] = 2; + // convert_ensight_to_ijk[4] = 4; + // convert_ensight_to_ijk[5] = 5; + // convert_ensight_to_ijk[6] = 7; + // convert_ensight_to_ijk[7] = 6; - int tmp_ijk_indx[8]; + // int tmp_ijk_indx[8]; - for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - tmp_ijk_indx[node_lid] = mesh.nodes_in_elem.host(elem_gid, convert_ensight_to_ijk[node_lid]); - } + // for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { + // for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { + // tmp_ijk_indx[node_lid] = mesh.nodes_in_elem.host(elem_gid, convert_ensight_to_ijk[node_lid]); + // } - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++){ - mesh.nodes_in_elem.host(elem_gid, node_lid) = tmp_ijk_indx[node_lid]; - } - } - // update device side - mesh.nodes_in_elem.update_device(); + // for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++){ + // mesh.nodes_in_elem.host(elem_gid, node_lid) = tmp_ijk_indx[node_lid]; + // } + // } + // // update device side + // mesh.nodes_in_elem.update_device(); - // initialize corner variables - int num_corners = num_elems * mesh.num_nodes_in_elem; - mesh.initialize_corners(num_corners); - // corner.initialize(num_corners, num_dims); + // // initialize corner variables + // int num_corners = num_elems * mesh.num_nodes_in_elem; + // mesh.initialize_corners(num_corners); + // // corner.initialize(num_corners, num_dims); - // Close mesh input file - fclose(in); + // // Close mesh input file + // fclose(in); - // Build connectivity - mesh.build_connectivity(); + // // Build connectivity + // mesh.build_connectivity(); - return; - } // end read ensight mesh + // return; + // } // end read ensight mesh ///////////////////////////////////////////////////////////////////////////// /// @@ -547,154 +547,154 @@ class MeshReader /// \param Number of dimensions /// ///////////////////////////////////////////////////////////////////////////// - void read_Abaqus_mesh(Mesh_t& mesh, - State_t& State, - int num_dims) - { - - std::cout<<"Reading abaqus input file for mesh"< nodes; - std::vector elements; - - std::string line; - bool readingNodes = false; - bool readingElements = false; + // void read_Abaqus_mesh(Mesh_t& mesh, + // State_t& State, + // int num_dims) + // { - while (std::getline(inputFile, line)) { - if (line.find("*Node") != std::string::npos) { - readingNodes = true; - std::cout<<"Found *Node"<> node.id && std::getline(iss, token, ',') && iss >> node.x && - std::getline(iss, token, ',') && iss >> node.y && - std::getline(iss, token, ',') && iss >> node.z)) { - std::cerr << "Failed to parse line: " << line << std::endl; - continue; // Skip this line if parsing failed - } - nodes.push_back(node); - } - - if (line.find("*Element") != std::string::npos) { - readingElements = true; - std::cout<<"Found *Element*"<> element.id)){ - std::cout << "Failed to parse line: " << line << std::endl; - continue; // Skip this line if parsing failed - } - - while ((std::getline(iss, token, ','))) { - // Now extract the integer, ignoring any trailing whitespace - int val; - iss >> val; - element.connectivity.push_back(val); - } + // } - // Convert from abaqus to IJK mesh - int convert_abq_to_ijk[8]; - convert_abq_to_ijk[0] = 0; - convert_abq_to_ijk[1] = 1; - convert_abq_to_ijk[2] = 3; - convert_abq_to_ijk[3] = 2; - convert_abq_to_ijk[4] = 4; - convert_abq_to_ijk[5] = 5; - convert_abq_to_ijk[6] = 7; - convert_abq_to_ijk[7] = 6; + // std::vector nodes; + // std::vector elements; + + // std::string line; + // bool readingNodes = false; + // bool readingElements = false; + + // while (std::getline(inputFile, line)) { + // if (line.find("*Node") != std::string::npos) { + // readingNodes = true; + // std::cout<<"Found *Node"<> node.id && std::getline(iss, token, ',') && iss >> node.x && + // std::getline(iss, token, ',') && iss >> node.y && + // std::getline(iss, token, ',') && iss >> node.z)) { + // std::cerr << "Failed to parse line: " << line << std::endl; + // continue; // Skip this line if parsing failed + // } + // nodes.push_back(node); + // } - int tmp_ijk_indx[8]; + // if (line.find("*Element") != std::string::npos) { + // readingElements = true; + // std::cout<<"Found *Element*"<> element.id)){ + // std::cout << "Failed to parse line: " << line << std::endl; + // continue; // Skip this line if parsing failed + // } + + // while ((std::getline(iss, token, ','))) { + // // Now extract the integer, ignoring any trailing whitespace + // int val; + // iss >> val; + // element.connectivity.push_back(val); + // } - for (int node_lid = 0; node_lid < 8; node_lid++) { - tmp_ijk_indx[node_lid] = element.connectivity[convert_abq_to_ijk[node_lid]]; - } + // // Convert from abaqus to IJK mesh + // int convert_abq_to_ijk[8]; + // convert_abq_to_ijk[0] = 0; + // convert_abq_to_ijk[1] = 1; + // convert_abq_to_ijk[2] = 3; + // convert_abq_to_ijk[3] = 2; + // convert_abq_to_ijk[4] = 4; + // convert_abq_to_ijk[5] = 5; + // convert_abq_to_ijk[6] = 7; + // convert_abq_to_ijk[7] = 6; + + // int tmp_ijk_indx[8]; + + // for (int node_lid = 0; node_lid < 8; node_lid++) { + // tmp_ijk_indx[node_lid] = element.connectivity[convert_abq_to_ijk[node_lid]]; + // } - for (int node_lid = 0; node_lid < 8; node_lid++){ - element.connectivity[node_lid] = tmp_ijk_indx[node_lid]; - } + // for (int node_lid = 0; node_lid < 8; node_lid++){ + // element.connectivity[node_lid] = tmp_ijk_indx[node_lid]; + // } - elements.push_back(element); - } - } + // elements.push_back(element); + // } + // } - inputFile.close(); + // inputFile.close(); - size_t num_nodes = nodes.size(); + // size_t num_nodes = nodes.size(); - printf("Number of nodes read in %lu\n", num_nodes); + // printf("Number of nodes read in %lu\n", num_nodes); - // initialize node variables - mesh.initialize_nodes(num_nodes); + // // initialize node variables + // mesh.initialize_nodes(num_nodes); - // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers - std::vector required_node_state = { node_state::coords }; + // // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers + // std::vector required_node_state = { node_state::coords }; - State.node.initialize(num_nodes, num_dims, required_node_state); + // State.node.initialize(num_nodes, num_dims, required_node_state); - // Copy nodes to mesh - for(int node_gid = 0; node_gid < num_nodes; node_gid++){ - State.node.coords.host(node_gid, 0) = nodes[node_gid].x; - State.node.coords.host(node_gid, 1) = nodes[node_gid].y; - State.node.coords.host(node_gid, 2) = nodes[node_gid].z; - } + // // Copy nodes to mesh + // for(int node_gid = 0; node_gid < num_nodes; node_gid++){ + // State.node.coords.host(node_gid, 0) = nodes[node_gid].x; + // State.node.coords.host(node_gid, 1) = nodes[node_gid].y; + // State.node.coords.host(node_gid, 2) = nodes[node_gid].z; + // } - // Update device nodal positions - State.node.coords.update_device(); + // // Update device nodal positions + // State.node.coords.update_device(); - // --- read in the elements in the mesh --- - size_t num_elems = elements.size(); - printf("Number of elements read in %lu\n", num_elems); + // // --- read in the elements in the mesh --- + // size_t num_elems = elements.size(); + // printf("Number of elements read in %lu\n", num_elems); - // initialize elem variables - mesh.initialize_elems(num_elems, num_dims); + // // initialize elem variables + // mesh.initialize_elems(num_elems, num_dims); - // for each cell read the list of associated nodes - for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - for (int node_lid = 0; node_lid < 8; node_lid++) { - mesh.nodes_in_elem.host(elem_gid, node_lid) = elements[elem_gid].connectivity[node_lid]; + // // for each cell read the list of associated nodes + // for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { + // for (int node_lid = 0; node_lid < 8; node_lid++) { + // mesh.nodes_in_elem.host(elem_gid, node_lid) = elements[elem_gid].connectivity[node_lid]; - // shift to start node index space at 0 - mesh.nodes_in_elem.host(elem_gid, node_lid) -= 1; - } - } + // // shift to start node index space at 0 + // mesh.nodes_in_elem.host(elem_gid, node_lid) -= 1; + // } + // } - // update device side - mesh.nodes_in_elem.update_device(); + // // update device side + // mesh.nodes_in_elem.update_device(); - // initialize corner variables - int num_corners = num_elems * mesh.num_nodes_in_elem; - mesh.initialize_corners(num_corners); - // State.corner.initialize(num_corners, num_dims); + // // initialize corner variables + // int num_corners = num_elems * mesh.num_nodes_in_elem; + // mesh.initialize_corners(num_corners); + // // State.corner.initialize(num_corners, num_dims); - // Build connectivity - mesh.build_connectivity(); - } // end read abaqus mesh + // // Build connectivity + // mesh.build_connectivity(); + // } // end read abaqus mesh ///////////////////////////////////////////////////////////////////////////// @@ -962,17 +962,17 @@ class MeshReader long long int node_gid; bool zero_index_base = true; real_t dof_value; + real_t unit_scaling = 1; CArrayKokkos read_buffer; // read the mesh // --- Read the number of nodes in the mesh --- // - size_t num_nodes = 0; + size_t global_num_nodes = 0; if (myrank == 0) { std::cout << " NUM DIM is " << num_dims << std::endl; - in = new std::ifstream(); in.open(mesh_file_); bool found = false; @@ -987,9 +987,9 @@ class MeshReader // POINTS %d float if (substring == "POINTS") { - line_parse >> num_nodes; - std::cout << "declared node count: " << num_nodes << std::endl; - if (num_nodes <= 0) + line_parse >> global_num_nodes; + std::cout << "declared node count: " << global_num_nodes << std::endl; + if (global_num_nodes <= 0) { throw std::runtime_error("ERROR, NO NODES IN MESH"); } @@ -1004,7 +1004,7 @@ class MeshReader } // end if(myrank==0) // broadcast number of nodes - MPI_Bcast(&num_nodes, 1, MPI_LONG_LONG_INT, 0, world); + MPI_Bcast(&global_num_nodes, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); // host_vec_array node_coords = dual_node_coords.view_host(); // notify that the host view is going to be modified in the file readin @@ -1013,7 +1013,7 @@ class MeshReader // old swage method // mesh->init_nodes(local_nrows); // add 1 for index starting at 1 - //std::cout << "Num nodes assigned to task " << myrank << " = " << nlocal_nodes << std::endl; + //std::cout << "Num nodes assigned to task " << myrank << " = " << num_local_nodes << std::endl; // read the initial mesh coordinates // x-coords @@ -1026,13 +1026,13 @@ class MeshReader num_nodes_in_elem *= 2; } - words_per_line = num_dims; - elem_words_per_line = num_nodes_in_elem; + int words_per_line = num_dims; + int elem_words_per_line = num_nodes_in_elem; // allocate read buffer read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); - dof_limit = num_nodes; + dof_limit = global_num_nodes; buffer_iterations = dof_limit / BUFFER_LINES; if (dof_limit % BUFFER_LINES != 0) { @@ -1041,19 +1041,19 @@ class MeshReader // read coords read_index_start = 0; - size_t nlocal_nodes; + size_t num_local_nodes; DistributedMap map; { //scoped so temp FArray data is auto deleted to save memory //allocate pre-partition node coords using contiguous decomposition //FArray type used since CArray type still doesnt support zoltan2 decomposition - DistributedDFArray node_coords_distributed(num_nodes, num_dims); + DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); // construct contiguous parallel row map now that we know the number of nodes map = node_coords_distributed.pmap; // map->describe(*fos,Teuchos::VERB_EXTREME); // set the vertices in the mesh read in - nlocal_nodes = map.size(); + num_local_nodes = map.size(); for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) { // pack buffer on rank 0 @@ -1079,7 +1079,7 @@ class MeshReader else if (myrank == 0) { buffer_loop = 0; - while (buffer_iteration * BUFFER_LINES + buffer_loop < num_nodes) { + while (buffer_iteration * BUFFER_LINES + buffer_loop < global_num_nodes) { getline(in, read_line); line_parse.clear(); line_parse.str(read_line); @@ -1097,9 +1097,9 @@ class MeshReader } // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong - MPI_Bcast(read_buffer.pointer(), BUFFER_LINES * words_per_line * MAX_WORD, MPI_CHAR, 0, world); + MPI_Bcast(read_buffer.pointer(), BUFFER_LINES * words_per_line * MAX_WORD, MPI_CHAR, 0, MPI_COMM_WORLD); // broadcast how many nodes were read into this buffer iteration - MPI_Bcast(&buffer_loop, 1, MPI_INT, 0, world); + MPI_Bcast(&buffer_loop, 1, MPI_INT, 0, MPI_COMM_WORLD); // debug_print // std::cout << "NODE BUFFER LOOP IS: " << buffer_loop << std::endl; @@ -1114,17 +1114,17 @@ class MeshReader // set global node id (ensight specific order) node_gid = read_index_start + scan_loop; // let map decide if this node id belongs locally; if yes store data - if (map->isNodeGlobalElement(node_gid)) + if (map.isProcessGlobalIndex(node_gid)) { // set local node index in this mpi rank - node_rid = map->getLocalElement(node_gid); + node_rid = map.getLocalIndex(node_gid); // extract nodal position from the read buffer // for tecplot format this is the three coords in the same line dof_value = atof(&read_buffer(scan_loop, 0, 0)); node_coords_distributed.host(node_rid, 0) = dof_value * unit_scaling; dof_value = atof(&read_buffer(scan_loop, 1, 0)); node_coords_distributed.host(node_rid, 1) = dof_value * unit_scaling; - if (num_dim == 3) + if (num_dims == 3) { dof_value = atof(&read_buffer(scan_loop, 2, 0)); node_coords_distributed.host(node_rid, 2) = dof_value * unit_scaling; @@ -1140,13 +1140,13 @@ class MeshReader //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope std::vector required_node_state = { node_state::coords }; map = node_coords_distributed.pmap; - node.post_repartition_initialize(partitioned_map, num_dims, required_node_state); + node.post_repartition_initialize(map, num_dims, required_node_state); } //initialize some mesh data - mesh.initialize_nodes(num_nodes); - nlocal_nodes = partitioned_map.size(); - mesh.nlocal_nodes = nlocal_nodes; + mesh.initialize_nodes(global_num_nodes); + num_local_nodes = map.size(); + mesh.num_local_nodes = num_local_nodes; // debug print of nodal data @@ -1155,7 +1155,7 @@ class MeshReader std::cout << " ------------NODAL POSITIONS ON TASK " << myrank << " --------------"<getGlobalElement(inode) + 1 << " { "; - for (int istride = 0; istride < num_dim; istride++){ + for (int istride = 0; istride < num_dims; istride++){ std::cout << node_coords(inode,istride) << " , "; } std::cout << " }"<< std::endl; @@ -1203,7 +1203,7 @@ class MeshReader } // end if(myrank==0) // broadcast number of elements - MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, world); + MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); //initialize num elem in mesh struct @@ -1280,9 +1280,9 @@ class MeshReader } // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong - MPI_Bcast(read_buffer.pointer(), BUFFER_LINES * elem_words_per_line * MAX_WORD, MPI_CHAR, 0, world); + MPI_Bcast(read_buffer.pointer(), BUFFER_LINES * elem_words_per_line * MAX_WORD, MPI_CHAR, 0, MPI_COMM_WORLD); // broadcast how many nodes were read into this buffer iteration - MPI_Bcast(&buffer_loop, 1, MPI_INT, 0, world); + MPI_Bcast(&buffer_loop, 1, MPI_INT, 0, MPI_COMM_WORLD); // store element connectivity that belongs to this rank // loop through read buffer @@ -1393,7 +1393,7 @@ class MeshReader mesh.initialize_elems(num_elems, num_nodes_in_elem, element_map); // copy temporary element storage to distributed storage - DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; + DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; for (int ielem = 0; ielem < num_elems; ielem++) { @@ -1412,8 +1412,8 @@ class MeshReader // Convert ensight index system to the ijk finite element numbering convention // for vertices in cell - CArrayKokkos convert_ensight_to_ijk(max_nodes_per_element); - CArrayKokkos tmp_ijk_indx(max_nodes_per_element); + CArrayKokkos convert_ensight_to_ijk(num_nodes_in_elem); + CArrayKokkos tmp_ijk_indx(num_nodes_in_elem); convert_ensight_to_ijk(0) = 0; convert_ensight_to_ijk(1) = 1; convert_ensight_to_ijk(2) = 3; @@ -1466,306 +1466,306 @@ class MeshReader /// \param Number of dimensions /// ///////////////////////////////////////////////////////////////////////////// - void read_vtu_mesh(Mesh_t& mesh, - GaussPoint_t& GaussPoints, - node_t& node, - corner_t& corner, - mesh_input_t& mesh_inps, - int num_dims) - { + // void read_vtu_mesh(Mesh_t& mesh, + // GaussPoint_t& GaussPoints, + // node_t& node, + // corner_t& corner, + // mesh_input_t& mesh_inps, + // int num_dims) + // { - std::cout<<"Reading VTU file in a multiblock VTK mesh"< required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dims, required_node_state); - - //------------------------------------ - // allocate the elem object id array - mesh_inps.object_ids = DCArrayKokkos (num_elems, "ObjectIDs"); - - - // ------------------------ - // Mesh file storage order: - // objectId - // Points - // connectivity - // offsets - // types - // ------------------------ + // //------------------------------------ + // // allocate mesh class nodes and elems + // mesh.initialize_nodes(num_nodes); + // mesh.initialize_elems(num_elems, num_dims); + + // //------------------------------------ + // // allocate node coordinate state + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_nodes, num_dims, required_node_state); + + // //------------------------------------ + // // allocate the elem object id array + // mesh_inps.object_ids = DCArrayKokkos (num_elems, "ObjectIDs"); + + + // // ------------------------ + // // Mesh file storage order: + // // objectId + // // Points + // // connectivity + // // offsets + // // types + // // ------------------------ - // temporary arrays - DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files - DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); - DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types - - - // for all fields, we stop recording when we get to "<" - std::string stop = "<"; - - // the size of 1D storage from reading the mesh file - size_t size; - - // --- - // Object ids - // --- - - // the object id in the element - // array dims are (num_elems) - found = extract_values_xml(mesh_inps.object_ids.host.pointer(), - "\"ObjectId\"", - stop, - in, - size); - if(found==false){ - throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); - //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; - } - mesh_inps.object_ids.update_device(); - - - // --- - // Nodal coordinates of mesh - // --- - - // coordinates of the node - // array dims are (num_nodes,dims) - // must use the quotes around Points to read the point values - found = extract_values_xml(node_coords.host.pointer(), - "\"Points\"", - stop, - in, - size); - if(found==false){ - throw std::runtime_error("**** ERROR: mesh nodes were not found in the XML file! ****"); - //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; - } - if (size!=num_nodes*3){ - throw std::runtime_error("ERROR: failed to read all the mesh nodes!"); - //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; - } - node_coords.update_device(); + // // temporary arrays + // DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files + // DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); + // DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types + + + // // for all fields, we stop recording when we get to "<" + // std::string stop = "<"; + + // // the size of 1D storage from reading the mesh file + // size_t size; + + // // --- + // // Object ids + // // --- + + // // the object id in the element + // // array dims are (num_elems) + // found = extract_values_xml(mesh_inps.object_ids.host.pointer(), + // "\"ObjectId\"", + // stop, + // in, + // size); + // if(found==false){ + // throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); + // //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; + // } + // mesh_inps.object_ids.update_device(); + + + // // --- + // // Nodal coordinates of mesh + // // --- + + // // coordinates of the node + // // array dims are (num_nodes,dims) + // // must use the quotes around Points to read the point values + // found = extract_values_xml(node_coords.host.pointer(), + // "\"Points\"", + // stop, + // in, + // size); + // if(found==false){ + // throw std::runtime_error("**** ERROR: mesh nodes were not found in the XML file! ****"); + // //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + // } + // if (size!=num_nodes*3){ + // throw std::runtime_error("ERROR: failed to read all the mesh nodes!"); + // //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; + // } + // node_coords.update_device(); - // dimensional scaling of the mesh - const double scl_x = mesh_inps.scale_x; - const double scl_y = mesh_inps.scale_y; - const double scl_z = mesh_inps.scale_z; + // // dimensional scaling of the mesh + // const double scl_x = mesh_inps.scale_x; + // const double scl_y = mesh_inps.scale_y; + // const double scl_z = mesh_inps.scale_z; - // save the node coordinates to the state array - FOR_ALL(node_gid, 0, mesh.num_nodes, { + // // save the node coordinates to the state array + // FOR_ALL(node_gid, 0, mesh.num_nodes, { - // save the nodal coordinates - node.coords(node_gid, 0) = scl_x*node_coords(node_gid, 0); // double - node.coords(node_gid, 1) = scl_y*node_coords(node_gid, 1); // double - if(num_dims==3){ - node.coords(node_gid, 2) = scl_z*node_coords(node_gid, 2); // double - } + // // save the nodal coordinates + // node.coords(node_gid, 0) = scl_x*node_coords(node_gid, 0); // double + // node.coords(node_gid, 1) = scl_y*node_coords(node_gid, 1); // double + // if(num_dims==3){ + // node.coords(node_gid, 2) = scl_z*node_coords(node_gid, 2); // double + // } - }); // end for parallel nodes - node.coords.update_host(); + // }); // end for parallel nodes + // node.coords.update_host(); - // --- - // Nodes in the element - // --- + // // --- + // // Nodes in the element + // // --- - // fill temporary nodes in the element array - // array dims are (num_elems,num_nodes_in_elem) - found = extract_values_xml(connectivity.host.pointer(), - "\"connectivity\"", - stop, - in, - size); - if(found==false){ - std::cout << "ERROR: mesh connectivity was not found in the XML file!" << std::endl; - } - connectivity.update_device(); - - // array dims are the (num_elems) - // 8 = pixal i,j,k linear quad format - // 9 = linear quad ensight ordering - // 12 = linear ensight hex ordering - // 72 = VTK_LAGRANGE_HEXAHEDRON - // .... - found = extract_values_xml(elem_types.host.pointer(), - "\"types\"", - stop, - in, - size); - if(found==false){ - std::cout << "ERROR: element types were not found in the XML file!" << std::endl; - } - elem_types.update_device(); - - // check that the element type is supported by Fierro - FOR_ALL (elem_gid, 0, mesh.num_elems, { - if(elem_types(elem_gid) == element_types::linear_quad || - elem_types(elem_gid) == element_types::linear_hex_ijk || - elem_types(elem_gid) == element_types::linear_hex || - elem_types(elem_gid) == element_types::arbitrary_hex ) - { - // at least one of them is true - } - else - { - // unknown element used - Kokkos::abort("Unknown element type in the mesh \n"); - } - }); + // // fill temporary nodes in the element array + // // array dims are (num_elems,num_nodes_in_elem) + // found = extract_values_xml(connectivity.host.pointer(), + // "\"connectivity\"", + // stop, + // in, + // size); + // if(found==false){ + // std::cout << "ERROR: mesh connectivity was not found in the XML file!" << std::endl; + // } + // connectivity.update_device(); + + // // array dims are the (num_elems) + // // 8 = pixal i,j,k linear quad format + // // 9 = linear quad ensight ordering + // // 12 = linear ensight hex ordering + // // 72 = VTK_LAGRANGE_HEXAHEDRON + // // .... + // found = extract_values_xml(elem_types.host.pointer(), + // "\"types\"", + // stop, + // in, + // size); + // if(found==false){ + // std::cout << "ERROR: element types were not found in the XML file!" << std::endl; + // } + // elem_types.update_device(); + + // // check that the element type is supported by Fierro + // FOR_ALL (elem_gid, 0, mesh.num_elems, { + // if(elem_types(elem_gid) == element_types::linear_quad || + // elem_types(elem_gid) == element_types::linear_hex_ijk || + // elem_types(elem_gid) == element_types::linear_hex || + // elem_types(elem_gid) == element_types::arbitrary_hex ) + // { + // // at least one of them is true + // } + // else + // { + // // unknown element used + // Kokkos::abort("Unknown element type in the mesh \n"); + // } + // }); - // Convert from ensight linear hex to a IJK mesh - CArrayKokkos convert_ensight_to_ijk(8, "convert_ensight_to_ijk"); + // // Convert from ensight linear hex to a IJK mesh + // CArrayKokkos convert_ensight_to_ijk(8, "convert_ensight_to_ijk"); - // Convert the arbitrary order hex to a IJK mesh - DCArrayKokkos convert_pn_vtk_to_ijk(mesh.num_nodes_in_elem, "convert_pn_vtk_to_ijk"); + // // Convert the arbitrary order hex to a IJK mesh + // DCArrayKokkos convert_pn_vtk_to_ijk(mesh.num_nodes_in_elem, "convert_pn_vtk_to_ijk"); - //build the connectivity for element type 12 - // elem_types.host(0) - switch(elem_types.host(0)){ + // //build the connectivity for element type 12 + // // elem_types.host(0) + // switch(elem_types.host(0)){ - case element_types::linear_quad: - // the node order is correct, no changes required + // case element_types::linear_quad: + // // the node order is correct, no changes required - FOR_ALL (elem_gid, 0, mesh.num_elems, { + // FOR_ALL (elem_gid, 0, mesh.num_elems, { - for (size_t node_lid=0; node_lid origin(num_dim); - // SimulationParameters.mesh_input.origin.update_host(); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + // std::vector origin(num_dim); + // // SimulationParameters.mesh_input.origin.update_host(); + // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - // --- 2D parameters --- - // const int num_faces_in_elem = 4; // number of faces in elem - // const int num_points_in_elem = 4; // number of points in elem - // const int num_points_in_face = 2; // number of points in a face - // const int num_edges_in_elem = 4; // number of edges in a elem + // // --- 2D parameters --- + // // const int num_faces_in_elem = 4; // number of faces in elem + // // const int num_points_in_elem = 4; // number of points in elem + // // const int num_points_in_face = 2; // number of points in a face + // // const int num_edges_in_elem = 4; // number of edges in a elem - // --- mesh node ordering --- - // Convert ijk index system to the finite element numbering convention - // for vertices in elem - auto convert_point_number_in_quad = CArray(4); - convert_point_number_in_quad(0) = 0; - convert_point_number_in_quad(1) = 1; - convert_point_number_in_quad(2) = 3; - convert_point_number_in_quad(3) = 2; + // // --- mesh node ordering --- + // // Convert ijk index system to the finite element numbering convention + // // for vertices in elem + // auto convert_point_number_in_quad = CArray(4); + // convert_point_number_in_quad(0) = 0; + // convert_point_number_in_quad(1) = 1; + // convert_point_number_in_quad(2) = 3; + // convert_point_number_in_quad(3) = 2; - // intialize node variables - mesh.initialize_nodes(num_nodes); + // // intialize node variables + // mesh.initialize_nodes(num_nodes); - // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers - std::vector required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dim, required_node_state); + // // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_nodes, num_dim, required_node_state); - // --- Build nodes --- + // // --- Build nodes --- - // populate the point data structures - for (int j = 0; j < num_points_j; j++) { - for (int i = 0; i < num_points_i; i++) { - // global id for the point - int node_gid = get_id(i, j, 0, num_points_i, num_points_j); + // // populate the point data structures + // for (int j = 0; j < num_points_j; j++) { + // for (int i = 0; i < num_points_i; i++) { + // // global id for the point + // int node_gid = get_id(i, j, 0, num_points_i, num_points_j); - // store the point coordinates - node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; - node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; - } // end for i - } // end for j + // // store the point coordinates + // node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; + // node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; + // } // end for i + // } // end for j - node.coords.update_device(); + // node.coords.update_device(); - // initialize elem variables - mesh.initialize_elems(num_elems, num_dim); + // // initialize elem variables + // mesh.initialize_elems(num_elems, num_dim); - // populate the elem center data structures - for (int j = 0; j < num_elems_j; j++) { - for (int i = 0; i < num_elems_i; i++) { - // global id for the elem - int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); + // // populate the elem center data structures + // for (int j = 0; j < num_elems_j; j++) { + // for (int i = 0; i < num_elems_i; i++) { + // // global id for the elem + // int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); - // store the point IDs for this elem where the range is - // (i:i+1, j:j+1 for a linear quad - int this_point = 0; + // // store the point IDs for this elem where the range is + // // (i:i+1, j:j+1 for a linear quad + // int this_point = 0; - for (int jcount = j; jcount <= j + 1; jcount++) { - for (int icount = i; icount <= i + 1; icount++) { - // global id for the points - int node_gid = get_id(icount, jcount, 0, num_points_i, num_points_j); + // for (int jcount = j; jcount <= j + 1; jcount++) { + // for (int icount = i; icount <= i + 1; icount++) { + // // global id for the points + // int node_gid = get_id(icount, jcount, 0, num_points_i, num_points_j); - // convert this_point index to the FE index convention - int this_index = convert_point_number_in_quad(this_point); + // // convert this_point index to the FE index convention + // int this_index = convert_point_number_in_quad(this_point); - // store the points in this elem according the the finite - // element numbering convention - mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + // // store the points in this elem according the the finite + // // element numbering convention + // mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; - // increment the point counting index - this_point = this_point + 1; - } // end for icount - } // end for jcount - } // end for i - } // end for j + // // increment the point counting index + // this_point = this_point + 1; + // } // end for icount + // } // end for jcount + // } // end for i + // } // end for j - // update device side - mesh.nodes_in_elem.update_device(); + // // update device side + // mesh.nodes_in_elem.update_device(); - // intialize corner variables - int num_corners = num_elems * mesh.num_nodes_in_elem; - mesh.initialize_corners(num_corners); - // corner.initialize(num_corners, num_dim); + // // intialize corner variables + // int num_corners = num_elems * mesh.num_nodes_in_elem; + // mesh.initialize_corners(num_corners); + // // corner.initialize(num_corners, num_dim); - // Build connectivity - mesh.build_connectivity(); - } // end build_2d_box + // // Build connectivity + // mesh.build_connectivity(); + // } // end build_2d_box ///////////////////////////////////////////////////////////////////////////// /// @@ -1974,127 +1974,127 @@ class MeshBuilder /// \param Simulation parameters /// ///////////////////////////////////////////////////////////////////////////// - void build_2d_polar(Mesh_t& mesh, - GaussPoint_t& GaussPoints, - node_t& node, - corner_t& corner, - SimulationParameters_t& SimulationParameters) const - { - printf("Creating a 2D polar mesh \n"); + // void build_2d_polar(Mesh_t& mesh, + // GaussPoint_t& GaussPoints, + // node_t& node, + // corner_t& corner, + // SimulationParameters_t& SimulationParameters) const + // { + // printf("Creating a 2D polar mesh \n"); - int num_dim = 2; + // int num_dim = 2; - const double inner_radius = SimulationParameters.mesh_input.inner_radius; - const double outer_radius = SimulationParameters.mesh_input.outer_radius; + // const double inner_radius = SimulationParameters.mesh_input.inner_radius; + // const double outer_radius = SimulationParameters.mesh_input.outer_radius; - const double start_angle = PI / 180.0 * SimulationParameters.mesh_input.starting_angle; - const double end_angle = PI / 180.0 * SimulationParameters.mesh_input.ending_angle; + // const double start_angle = PI / 180.0 * SimulationParameters.mesh_input.starting_angle; + // const double end_angle = PI / 180.0 * SimulationParameters.mesh_input.ending_angle; - const int num_elems_i = SimulationParameters.mesh_input.num_radial_elems; - const int num_elems_j = SimulationParameters.mesh_input.num_angular_elems; + // const int num_elems_i = SimulationParameters.mesh_input.num_radial_elems; + // const int num_elems_j = SimulationParameters.mesh_input.num_angular_elems; - const int num_points_i = num_elems_i + 1; // num points in x - const int num_points_j = num_elems_j + 1; // num points in y + // const int num_points_i = num_elems_i + 1; // num points in x + // const int num_points_j = num_elems_j + 1; // num points in y - const int num_nodes = num_points_i * num_points_j; + // const int num_nodes = num_points_i * num_points_j; - const double dx = (outer_radius - inner_radius) / ((double)num_elems_i); // len/(elems) - const double dy = (end_angle - start_angle) / ((double)num_elems_j); // len/(elems) + // const double dx = (outer_radius - inner_radius) / ((double)num_elems_i); // len/(elems) + // const double dy = (end_angle - start_angle) / ((double)num_elems_j); // len/(elems) - const int num_elems = num_elems_i * num_elems_j; + // const int num_elems = num_elems_i * num_elems_j; - std::vector origin(num_dim); + // std::vector origin(num_dim); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - // --- 2D parameters --- - // const int num_faces_in_elem = 4; // number of faces in elem - // const int num_points_in_elem = 4; // number of points in elem - // const int num_points_in_face = 2; // number of points in a face - // const int num_edges_in_elem = 4; // number of edges in a elem + // // --- 2D parameters --- + // // const int num_faces_in_elem = 4; // number of faces in elem + // // const int num_points_in_elem = 4; // number of points in elem + // // const int num_points_in_face = 2; // number of points in a face + // // const int num_edges_in_elem = 4; // number of edges in a elem - // --- mesh node ordering --- - // Convert ijk index system to the finite element numbering convention - // for vertices in elem - auto convert_point_number_in_quad = CArray(4); - convert_point_number_in_quad(0) = 0; - convert_point_number_in_quad(1) = 1; - convert_point_number_in_quad(2) = 3; - convert_point_number_in_quad(3) = 2; + // // --- mesh node ordering --- + // // Convert ijk index system to the finite element numbering convention + // // for vertices in elem + // auto convert_point_number_in_quad = CArray(4); + // convert_point_number_in_quad(0) = 0; + // convert_point_number_in_quad(1) = 1; + // convert_point_number_in_quad(2) = 3; + // convert_point_number_in_quad(3) = 2; - // intialize node variables - mesh.initialize_nodes(num_nodes); + // // intialize node variables + // mesh.initialize_nodes(num_nodes); - // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers - std::vector required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dim, required_node_state); + // // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_nodes, num_dim, required_node_state); - // populate the point data structures - for (int j = 0; j < num_points_j; j++) { - for (int i = 0; i < num_points_i; i++) { - // global id for the point - int node_gid = get_id(i, j, 0, num_points_i, num_points_j); + // // populate the point data structures + // for (int j = 0; j < num_points_j; j++) { + // for (int i = 0; i < num_points_i; i++) { + // // global id for the point + // int node_gid = get_id(i, j, 0, num_points_i, num_points_j); - double r_i = inner_radius + (double)i * dx; - double theta_j = start_angle + (double)j * dy; + // double r_i = inner_radius + (double)i * dx; + // double theta_j = start_angle + (double)j * dy; - // store the point coordinates - node.coords.host(node_gid, 0) = origin[0] + r_i * cos(theta_j); - node.coords.host(node_gid, 1) = origin[1] + r_i * sin(theta_j); + // // store the point coordinates + // node.coords.host(node_gid, 0) = origin[0] + r_i * cos(theta_j); + // node.coords.host(node_gid, 1) = origin[1] + r_i * sin(theta_j); - if(node.coords.host(node_gid, 0) < 0.0){ - throw std::runtime_error("**** NODE RADIUS FOR RZ MESH MUST BE POSITIVE ****"); - } + // if(node.coords.host(node_gid, 0) < 0.0){ + // throw std::runtime_error("**** NODE RADIUS FOR RZ MESH MUST BE POSITIVE ****"); + // } - } // end for i - } // end for j + // } // end for i + // } // end for j - node.coords.update_device(); + // node.coords.update_device(); - // initialize elem variables - mesh.initialize_elems(num_elems, num_dim); + // // initialize elem variables + // mesh.initialize_elems(num_elems, num_dim); - // populate the elem center data structures - for (int j = 0; j < num_elems_j; j++) { - for (int i = 0; i < num_elems_i; i++) { - // global id for the elem - int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); + // // populate the elem center data structures + // for (int j = 0; j < num_elems_j; j++) { + // for (int i = 0; i < num_elems_i; i++) { + // // global id for the elem + // int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); - // store the point IDs for this elem where the range is - // (i:i+1, j:j+1 for a linear quad - int this_point = 0; + // // store the point IDs for this elem where the range is + // // (i:i+1, j:j+1 for a linear quad + // int this_point = 0; - for (int jcount = j; jcount <= j + 1; jcount++) { - for (int icount = i; icount <= i + 1; icount++) { - // global id for the points - int node_gid = get_id(icount, jcount, 0, num_points_i, num_points_j); + // for (int jcount = j; jcount <= j + 1; jcount++) { + // for (int icount = i; icount <= i + 1; icount++) { + // // global id for the points + // int node_gid = get_id(icount, jcount, 0, num_points_i, num_points_j); - // convert this_point index to the FE index convention - int this_index = convert_point_number_in_quad(this_point); + // // convert this_point index to the FE index convention + // int this_index = convert_point_number_in_quad(this_point); - // store the points in this elem according the the finite - // element numbering convention - mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + // // store the points in this elem according the the finite + // // element numbering convention + // mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; - // increment the point counting index - this_point = this_point + 1; - } // end for icount - } // end for jcount - } // end for i - } // end for j + // // increment the point counting index + // this_point = this_point + 1; + // } // end for icount + // } // end for jcount + // } // end for i + // } // end for j - // update device side - mesh.nodes_in_elem.update_device(); + // // update device side + // mesh.nodes_in_elem.update_device(); - // intialize corner variables - int num_corners = num_elems * mesh.num_nodes_in_elem; - mesh.initialize_corners(num_corners); - // corner.initialize(num_corners, num_dim); + // // intialize corner variables + // int num_corners = num_elems * mesh.num_nodes_in_elem; + // mesh.initialize_corners(num_corners); + // // corner.initialize(num_corners, num_dim); - // Build connectivity - mesh.build_connectivity(); - } // end build_2d_box + // // Build connectivity + // mesh.build_connectivity(); + // } // end build_2d_box ///////////////////////////////////////////////////////////////////////////// /// @@ -2109,125 +2109,125 @@ class MeshBuilder /// \param Simulation parameters /// ///////////////////////////////////////////////////////////////////////////// - void build_3d_box(Mesh_t& mesh, - GaussPoint_t& GaussPoints, - node_t& node, - corner_t& corner, - SimulationParameters_t& SimulationParameters) const - { - printf("Creating a 3D box mesh \n"); - - const int num_dim = 3; - - // SimulationParameters.mesh_input.length.update_host(); - const double lx = SimulationParameters.mesh_input.length[0]; - const double ly = SimulationParameters.mesh_input.length[1]; - const double lz = SimulationParameters.mesh_input.length[2]; - - // SimulationParameters.mesh_input.num_elems.update_host(); - const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; - const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; - const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; - - const int num_points_i = num_elems_i + 1; // num points in x - const int num_points_j = num_elems_j + 1; // num points in y - const int num_points_k = num_elems_k + 1; // num points in y - - const int num_nodes = num_points_i * num_points_j * num_points_k; - - const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) - const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) - const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) + // void build_3d_box(Mesh_t& mesh, + // GaussPoint_t& GaussPoints, + // node_t& node, + // corner_t& corner, + // SimulationParameters_t& SimulationParameters) const + // { + // printf("Creating a 3D box mesh \n"); - const int num_elems = num_elems_i * num_elems_j * num_elems_k; + // const int num_dim = 3; - std::vector origin(num_dim); - // SimulationParameters.mesh_input.origin.update_host(); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + // // SimulationParameters.mesh_input.length.update_host(); + // const double lx = SimulationParameters.mesh_input.length[0]; + // const double ly = SimulationParameters.mesh_input.length[1]; + // const double lz = SimulationParameters.mesh_input.length[2]; - // --- 3D parameters --- - // const int num_faces_in_elem = 6; // number of faces in elem - // const int num_points_in_elem = 8; // number of points in elem - // const int num_points_in_face = 4; // number of points in a face - // const int num_edges_in_elem = 12; // number of edges in a elem + // // SimulationParameters.mesh_input.num_elems.update_host(); + // const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + // const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + // const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; + // const int num_points_i = num_elems_i + 1; // num points in x + // const int num_points_j = num_elems_j + 1; // num points in y + // const int num_points_k = num_elems_k + 1; // num points in y - // initialize mesh node variables - mesh.initialize_nodes(num_nodes); + // const int num_nodes = num_points_i * num_points_j * num_points_k; - // initialize node state variables, for now, we just need coordinates, the rest will be initialize by the respective solvers - std::vector required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dim, required_node_state); + // const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) + // const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) + // const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) - // --- Build nodes --- + // const int num_elems = num_elems_i * num_elems_j * num_elems_k; - // populate the point data structures - for (int k = 0; k < num_points_k; k++) { - for (int j = 0; j < num_points_j; j++) { - for (int i = 0; i < num_points_i; i++) { - // global id for the point - int node_gid = get_id(i, j, k, num_points_i, num_points_j); + // std::vector origin(num_dim); + // // SimulationParameters.mesh_input.origin.update_host(); + // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - // store the point coordinates - node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; - node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; - node.coords.host(node_gid, 2) = origin[2] + (double)k * dz; - } // end for i - } // end for j - } // end for k + // // --- 3D parameters --- + // // const int num_faces_in_elem = 6; // number of faces in elem + // // const int num_points_in_elem = 8; // number of points in elem + // // const int num_points_in_face = 4; // number of points in a face + // // const int num_edges_in_elem = 12; // number of edges in a elem - node.coords.update_device(); + // // initialize mesh node variables + // mesh.initialize_nodes(num_nodes); - // initialize elem variables - mesh.initialize_elems(num_elems, num_dim); + // // initialize node state variables, for now, we just need coordinates, the rest will be initialize by the respective solvers + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_nodes, num_dim, required_node_state); - // --- Build elems --- + // // --- Build nodes --- - // populate the elem center data structures - for (int k = 0; k < num_elems_k; k++) { - for (int j = 0; j < num_elems_j; j++) { - for (int i = 0; i < num_elems_i; i++) { - // global id for the elem - int elem_gid = get_id(i, j, k, num_elems_i, num_elems_j); + // // populate the point data structures + // for (int k = 0; k < num_points_k; k++) { + // for (int j = 0; j < num_points_j; j++) { + // for (int i = 0; i < num_points_i; i++) { + // // global id for the point + // int node_gid = get_id(i, j, k, num_points_i, num_points_j); - // store the point IDs for this elem where the range is - // (i:i+1, j:j+1, k:k+1) for a linear hexahedron - int this_point = 0; - for (int kcount = k; kcount <= k + 1; kcount++) { - for (int jcount = j; jcount <= j + 1; jcount++) { - for (int icount = i; icount <= i + 1; icount++) { - // global id for the points - int node_gid = get_id(icount, jcount, kcount, - num_points_i, num_points_j); + // // store the point coordinates + // node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; + // node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; + // node.coords.host(node_gid, 2) = origin[2] + (double)k * dz; + // } // end for i + // } // end for j + // } // end for k - // convert this_point index to the FE index convention - int this_index = this_point; //convert_point_number_in_Hex(this_point); - // store the points in this elem according the the finite - // element numbering convention - mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + // node.coords.update_device(); - // increment the point counting index - this_point = this_point + 1; - } // end for icount - } // end for jcount - } // end for kcount - } // end for i - } // end for j - } // end for k + // // initialize elem variables + // mesh.initialize_elems(num_elems, num_dim); + + // // --- Build elems --- + + // // populate the elem center data structures + // for (int k = 0; k < num_elems_k; k++) { + // for (int j = 0; j < num_elems_j; j++) { + // for (int i = 0; i < num_elems_i; i++) { + // // global id for the elem + // int elem_gid = get_id(i, j, k, num_elems_i, num_elems_j); + + // // store the point IDs for this elem where the range is + // // (i:i+1, j:j+1, k:k+1) for a linear hexahedron + // int this_point = 0; + // for (int kcount = k; kcount <= k + 1; kcount++) { + // for (int jcount = j; jcount <= j + 1; jcount++) { + // for (int icount = i; icount <= i + 1; icount++) { + // // global id for the points + // int node_gid = get_id(icount, jcount, kcount, + // num_points_i, num_points_j); + + // // convert this_point index to the FE index convention + // int this_index = this_point; //convert_point_number_in_Hex(this_point); + + // // store the points in this elem according the the finite + // // element numbering convention + // mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + + // // increment the point counting index + // this_point = this_point + 1; + // } // end for icount + // } // end for jcount + // } // end for kcount + // } // end for i + // } // end for j + // } // end for k - // update device side - mesh.nodes_in_elem.update_device(); + // // update device side + // mesh.nodes_in_elem.update_device(); - // initialize corner variables - int num_corners = num_elems * mesh.num_nodes_in_elem; - mesh.initialize_corners(num_corners); - // corner.initialize(num_corners, num_dim); + // // initialize corner variables + // int num_corners = num_elems * mesh.num_nodes_in_elem; + // mesh.initialize_corners(num_corners); + // // corner.initialize(num_corners, num_dim); - // Build connectivity - mesh.build_connectivity(); - } // end build_3d_box + // // Build connectivity + // mesh.build_connectivity(); + // } // end build_3d_box ///////////////////////////////////////////////////////////////////////////// /// @@ -2242,164 +2242,164 @@ class MeshBuilder /// \param Simulation parameters /// ///////////////////////////////////////////////////////////////////////////// - void build_3d_HexN_box(Mesh_t& mesh, - GaussPoint_t& GaussPoints, - node_t& node, - corner_t& corner, - SimulationParameters_t& SimulationParameters) const - { - printf(" ***** WARNING:: build_3d_HexN_box not yet implemented\n"); - const int num_dim = 3; + // void build_3d_HexN_box(Mesh_t& mesh, + // GaussPoint_t& GaussPoints, + // node_t& node, + // corner_t& corner, + // SimulationParameters_t& SimulationParameters) const + // { + // printf(" ***** WARNING:: build_3d_HexN_box not yet implemented\n"); + // const int num_dim = 3; - // SimulationParameters.mesh_input.length.update_host(); - const double lx = SimulationParameters.mesh_input.length[0]; - const double ly = SimulationParameters.mesh_input.length[1]; - const double lz = SimulationParameters.mesh_input.length[2]; + // // SimulationParameters.mesh_input.length.update_host(); + // const double lx = SimulationParameters.mesh_input.length[0]; + // const double ly = SimulationParameters.mesh_input.length[1]; + // const double lz = SimulationParameters.mesh_input.length[2]; - // SimulationParameters.mesh_input.num_elems.update_host(); - const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; - const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; - const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; + // // SimulationParameters.mesh_input.num_elems.update_host(); + // const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + // const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + // const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; - // creating zones for the Pn order - const int Pn_order = SimulationParameters.mesh_input.p_order; + // // creating zones for the Pn order + // const int Pn_order = SimulationParameters.mesh_input.p_order; - if (Pn_order > 19) { - printf("Fierro DG and RD solvers are only valid for elements up to Pn = 19 \n"); - return; - } + // if (Pn_order > 19) { + // printf("Fierro DG and RD solvers are only valid for elements up to Pn = 19 \n"); + // return; + // } - const int num_zones_i = Pn_order*num_elems_i; - const int num_zones_j = Pn_order*num_elems_j; - const int num_zones_k = Pn_order*num_elems_k; + // const int num_zones_i = Pn_order*num_elems_i; + // const int num_zones_j = Pn_order*num_elems_j; + // const int num_zones_k = Pn_order*num_elems_k; - const int num_points_i = num_zones_i+1; // num points in x accounting for Pn - const int num_points_j = num_zones_j+1; // num points in y accounting for Pn - const int num_points_k = num_zones_k+1; // num points in y accounting for Pn + // const int num_points_i = num_zones_i+1; // num points in x accounting for Pn + // const int num_points_j = num_zones_j+1; // num points in y accounting for Pn + // const int num_points_k = num_zones_k+1; // num points in y accounting for Pn - const double dx = lx/((double)num_zones_i); // len/(num_zones_i) - const double dy = ly/((double)num_zones_j); // len/(num_zones_j) - const double dz = lz/((double)num_zones_k); // len/(num_zones_k) + // const double dx = lx/((double)num_zones_i); // len/(num_zones_i) + // const double dy = ly/((double)num_zones_j); // len/(num_zones_j) + // const double dz = lz/((double)num_zones_k); // len/(num_zones_k) - const int num_elems = num_elems_i*num_elems_j*num_elems_k; - // const int num_zones = num_zones_i*num_zones_j*num_zones_k; // accounts for Pn + // const int num_elems = num_elems_i*num_elems_j*num_elems_k; + // // const int num_zones = num_zones_i*num_zones_j*num_zones_k; // accounts for Pn - std::vector origin(num_dim); - for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + // std::vector origin(num_dim); + // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - // --- 3D parameters --- - // const int num_faces_in_zone = 6; // number of faces in zone - // const int num_points_in_zone = 8; // number of points in zone - // const int num_points_in_face = 4; // number of points in a face + // // --- 3D parameters --- + // // const int num_faces_in_zone = 6; // number of faces in zone + // // const int num_points_in_zone = 8; // number of points in zone + // // const int num_points_in_face = 4; // number of points in a face - // p_order = 1, 2, 3, 4, 5 - // num_nodes = 2, 3, 4, 5, 6 - const int num_1D_points = Pn_order+1; - const int num_points_in_elem = num_1D_points*num_1D_points*num_1D_points; + // // p_order = 1, 2, 3, 4, 5 + // // num_nodes = 2, 3, 4, 5, 6 + // const int num_1D_points = Pn_order+1; + // const int num_points_in_elem = num_1D_points*num_1D_points*num_1D_points; - // --- elem --- - auto elem_coords = CArray (num_elems, num_dim); - auto elem_point_list = CArray (num_elems, num_points_in_elem); + // // --- elem --- + // auto elem_coords = CArray (num_elems, num_dim); + // auto elem_point_list = CArray (num_elems, num_points_in_elem); - // --- point --- - int num_points = num_points_i * num_points_j * num_points_k; - auto pt_coords = CArray (num_points, num_dim); + // // --- point --- + // int num_points = num_points_i * num_points_j * num_points_k; + // auto pt_coords = CArray (num_points, num_dim); - // --- Build nodes --- + // // --- Build nodes --- - // initialize node variables - mesh.initialize_nodes(num_points); + // // initialize node variables + // mesh.initialize_nodes(num_points); - // - std::vector required_node_state = { node_state::coords }; - node.initialize(num_points, num_dim, required_node_state); - // populate the point data structures - for (int k = 0; k < num_points_k; k++){ - for (int j = 0; j < num_points_j; j++){ - for (int i = 0; i < num_points_i; i++){ + // // + // std::vector required_node_state = { node_state::coords }; + // node.initialize(num_points, num_dim, required_node_state); + // // populate the point data structures + // for (int k = 0; k < num_points_k; k++){ + // for (int j = 0; j < num_points_j; j++){ + // for (int i = 0; i < num_points_i; i++){ - // global id for the point - int node_gid = get_id(i, j, k, num_points_i, num_points_j); + // // global id for the point + // int node_gid = get_id(i, j, k, num_points_i, num_points_j); - // store the point coordinates - node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; - node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; - node.coords.host(node_gid, 2) = origin[2] + (double)k * dz; + // // store the point coordinates + // node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; + // node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; + // node.coords.host(node_gid, 2) = origin[2] + (double)k * dz; - } // end for k - } // end for i - } // end for j + // } // end for k + // } // end for i + // } // end for j - node.coords.update_device(); + // node.coords.update_device(); - // initialize elem variables - mesh.initialize_elems(num_elems, num_dim); + // // initialize elem variables + // mesh.initialize_elems(num_elems, num_dim); - // --- Build elems --- + // // --- Build elems --- - // populate the elem center data structures accounting for Pn - for (int k=0; k& state_node_coords, + const DistributedDCArray& state_node_coords, DCArrayKokkos& mat_node_coords, DCArrayKokkos & mat_nodes_in_mat_elem, const DCArrayKokkos& MaterialToMeshMaps_elem, diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 26e6bfc22..73ac93df6 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -312,6 +312,9 @@ struct node_t case node_state::heat_transfer: if (q_transfer.size() == 0) this->q_transfer = DistributedDCArray(num_nodes, "node_q_transfer"); break; + case node_state::gradient_level_set: + if (gradient_level_set.size() == 0) this->gradient_level_set = DistributedDCArray(num_nodes, num_dims, "node_grad_levelset"); + break; default: std::cout<<"Desired node state not understood in node_t initialize"<q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); break; case node_state::gradient_level_set: - if (gradient_level_set.size() == 0) this->gradient_level_set = DCArrayKokkos(num_nodes, num_dims, "node_grad_levelset"); - break; - case node_state::gradient_level_set: - if (gradient_level_set.size() == 0) this->gradient_level_set = DCArrayKokkos(num_nodes, num_dims, "node_grad_levelset"); + if (gradient_level_set.size() == 0) this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); break; default: std::cout<<"Desired node state not understood in node_t initialize"<& B_matrix, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids) { const size_t num_nodes = 8; @@ -269,7 +269,7 @@ void geometry::get_bmatrix(const ViewCArrayKokkos& B_matrix, KOKKOS_FUNCTION void geometry::get_vol_quad(const DCArrayKokkos& elem_vol, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids) { elem_vol(elem_gid) = 0.0; @@ -315,7 +315,7 @@ void geometry::get_vol_quad(const DCArrayKokkos& elem_vol, KOKKOS_FUNCTION void geometry::get_vol_hex(const DCArrayKokkos& elem_vol, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids) { const size_t num_nodes = 8; @@ -365,7 +365,7 @@ void geometry::get_vol_hex(const DCArrayKokkos& elem_vol, /// ///////////////////////////////////////////////////////////////////////////// void geometry::get_vol(const DCArrayKokkos& elem_vol, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const Mesh_t& mesh) { const size_t num_dims = mesh.num_dims; @@ -406,7 +406,7 @@ void geometry::get_vol(const DCArrayKokkos& elem_vol, KOKKOS_FUNCTION void geometry::get_bmatrix2D(const ViewCArrayKokkos& B_matrix, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids) { const size_t num_nodes = 4; @@ -481,7 +481,7 @@ void geometry::get_bmatrix2D(const ViewCArrayKokkos& B_matrix, ///////////////////////////////////////////////////////////////////////////// KOKKOS_FUNCTION double geometry::get_area_quad(const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids) { double elem_area = 0.0; @@ -567,7 +567,7 @@ double geometry::heron(const double x1, KOKKOS_FUNCTION void geometry::get_area_weights2D(const ViewCArrayKokkos& corner_areas, const size_t elem_gid, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const ViewCArrayKokkos& elem_node_gids) { const size_t num_nodes = 4; @@ -631,7 +631,7 @@ size_t check_bdy(const size_t patch_gid, const double orig_y, const double orig_z, const Mesh_t& mesh, - const DCArrayKokkos& node_coords) + const DistributedDCArray& node_coords) { size_t num_dims = mesh.num_dims; @@ -714,7 +714,7 @@ size_t check_bdy(const size_t patch_gid, ///////////////////////////////////////////////////////////////////////////// void tag_bdys(const BoundaryCondition_t& boundary, Mesh_t& mesh, - const DCArrayKokkos& node_coords) + const DistributedDCArray& node_coords) { // create a temporary storage for the bdy patches in a set diff --git a/single-node-refactor/src/common/src/region_fill.cpp b/single-node-refactor/src/common/src/region_fill.cpp index 79e064921..539501c8f 100644 --- a/single-node-refactor/src/common/src/region_fill.cpp +++ b/single-node-refactor/src/common/src/region_fill.cpp @@ -123,7 +123,7 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, SimulationParameters.region_setups.region_fills_host, SimulationParameters.region_setups.fill_gauss_states, SimulationParameters.region_setups.fill_node_states, - num_mats_per_elem); + max_num_mats_per_elem); // note: the device and host side are updated in the above function diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index e75931307..da853af48 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -179,12 +179,12 @@ void Driver::initialize() solvers.push_back(sgtm_solver_3d); } // end if SGTM solver - else if (SimulationParamaters.solver_inputs[solver_id].method == solver_input::levelSet) { + else if (SimulationParameters.solver_inputs[solver_id].method == solver_input::levelSet) { std::cout << "Initializing level set solver" << std::endl; LevelSet* level_set_solver = new LevelSet(); - level_set_solver->initialize(SimulationParamaters, + level_set_solver->initialize(SimulationParameters, Materials, mesh, BoundaryConditions, @@ -333,7 +333,7 @@ void Driver::setup_solver_vars(T& a_solver, // the final time of the simulation - double time_final = this->SimulationParamaters.dynamic_options.time_final; + double time_final = this->SimulationParameters.dynamic_options.time_final; // save the solver_id a_solver->solver_id = solver_id; @@ -345,7 +345,7 @@ void Driver::setup_solver_vars(T& a_solver, // setting the ending times are tricky, requiring logic // set the start and ending times - double t_end = this->SimulationParamaters.solver_inputs[solver_id].time_end; // default is t=0 + double t_end = this->SimulationParameters.solver_inputs[solver_id].time_end; // default is t=0 if(solver_id==0){ a_solver->time_start = 0.0; From e7d73d16628f59c3906571102f70878210aa8470 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sat, 24 May 2025 16:55:33 -0600 Subject: [PATCH 10/66] WIP: MPI changes --- .../SGH_solver_3D/include/sgh_solver_3D.h | 68 ++++++++-------- .../Solvers/SGH_solver_3D/src/boundary.cpp | 6 +- .../Solvers/SGH_solver_3D/src/energy_sgh.cpp | 4 +- .../Solvers/SGH_solver_3D/src/force_sgh.cpp | 4 +- .../Solvers/SGH_solver_3D/src/momentum.cpp | 20 ++--- .../Solvers/SGH_solver_3D/src/position.cpp | 8 +- .../Solvers/SGH_solver_3D/src/properties.cpp | 8 +- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 10 +-- .../SGH_solver_3D/src/time_integration.cpp | 12 +-- .../SGH_solver_rz/include/sgh_solver_rz.h | 78 +++++++++---------- .../Solvers/SGH_solver_rz/src/boundary_rz.cpp | 4 +- .../SGH_solver_rz/src/energy_sgh_rz.cpp | 8 +- .../SGTM_solver_3D/include/sgtm_solver_3D.h | 56 ++++++------- .../src/common/src/region_fill.cpp | 4 +- 14 files changed, 145 insertions(+), 145 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h index 7fd3b87ac..3aca4c8eb 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h @@ -234,7 +234,7 @@ class SGH3D : public Solver void boundary_stress(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, DCArrayKokkos& node_bdy_force, - DCArrayKokkos& node_coords, + DistributedDCArray& node_coords, const double time_value) const; // **** Functions defined in energy_sgh.cpp **** // @@ -242,8 +242,8 @@ class SGH3D : public Solver const double rk_alpha, const double dt, const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_sie_n0, const DCArrayKokkos& MaterialPoints_mass, @@ -260,8 +260,8 @@ class SGH3D : public Solver const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& GaussPoints_eroded, const DCArrayKokkos& corner_force, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_pres, @@ -285,34 +285,34 @@ class SGH3D : public Solver double dt, const size_t num_dims, const size_t num_nodes, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0) const; + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0) const; // **** Functions defined in momentum.cpp **** // void update_velocity( double rk_alpha, double dt, const Mesh_t& mesh, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_mass, - const DCArrayKokkos& node_force, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_mass, + const DistributedDCArray& node_force, const DCArrayKokkos& corner_force) const; void get_velgrad( DCArrayKokkos& vel_grad, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& elem_vol) const; void get_divergence( DCArrayKokkos& GaussPoints_div, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vol) const; KOKKOS_FUNCTION @@ -325,8 +325,8 @@ class SGH3D : public Solver void update_state( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_pres, @@ -354,8 +354,8 @@ class SGH3D : public Solver const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, @@ -379,10 +379,10 @@ class SGH3D : public Solver // **** Functions defined in time_integration.cpp **** // // NOTE: Consider pulling up void rk_init( - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, DCArrayKokkos& MaterialPoints_sie, DCArrayKokkos& MaterialPoints_sie_n0, DCArrayKokkos& MaterialPoints_stress, @@ -394,8 +394,8 @@ class SGH3D : public Solver void get_timestep( Mesh_t& mesh, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_vel, + DistributedDCArray& node_coords, + DistributedDCArray& node_vel, DCArrayKokkos& GaussPoints_vol, DCArrayKokkos& MaterialPoints_sspd, DCArrayKokkos& MaterialPoints_eroded, @@ -436,8 +436,8 @@ class SGH3D : public Solver const double sie, const DCArrayKokkos& GaussPoints_vel_grad, const ViewCArrayKokkos& elem_node_gids, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const double vol, const double dt, const double rk_alpha); @@ -448,16 +448,16 @@ double sum_domain_internal_energy(const DCArrayKokkos& MaterialPoints_ma const size_t num_mat_points); double sum_domain_kinetic_energy(const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass); + const DistributedDCArray& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass); double sum_domain_material_mass(const DCArrayKokkos& MaterialPoints_mass, const size_t num_mat_points); double sum_domain_node_mass(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass); + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass); void set_corner_force_zero(const Mesh_t& mesh, const DCArrayKokkos& corner_force); diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp index 2f3054a0c..62f07db73 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp @@ -50,7 +50,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// void SGH3D::boundary_velocity(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const { size_t num_vel_bdy_sets = BoundaryConditions.num_vel_bdy_sets_in_solver.host(this->solver_id); @@ -96,7 +96,7 @@ void SGH3D::boundary_velocity(const Mesh_t& mesh, ///////////////////////////////////////////////////////////////////////////// void SGH3D::boundary_contact(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const { return; @@ -119,7 +119,7 @@ void SGH3D::boundary_contact(const Mesh_t& mesh, void SGH3D::boundary_stress(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, DCArrayKokkos& node_bdy_force, - DCArrayKokkos& node_coords, + DistributedDCArray& node_coords, const double time_value) const { diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/energy_sgh.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/energy_sgh.cpp index 143694956..bed17b316 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/energy_sgh.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/energy_sgh.cpp @@ -55,8 +55,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGH3D::update_energy(const double rk_alpha, const double dt, const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_sie_n0, const DCArrayKokkos& MaterialPoints_mass, diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/force_sgh.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/force_sgh.cpp index 9119ef4fd..aa213defa 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/force_sgh.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/force_sgh.cpp @@ -72,8 +72,8 @@ void SGH3D::get_force(const Material_t& Materials, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, const DCArrayKokkos& corner_force, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_pres, diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp index d7eff013d..7455add18 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp @@ -52,11 +52,11 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGH3D::update_velocity(double rk_alpha, double dt, const Mesh_t& mesh, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_mass, - const DCArrayKokkos& node_force, - const DCArrayKokkos& corner_force) const + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_mass, + const DistributedDCArray& node_force, + const DistributedDCArray& corner_force) const { const size_t num_dims = mesh.num_dims; @@ -100,9 +100,9 @@ void SGH3D::update_velocity(double rk_alpha, ///////////////////////////////////////////////////////////////////////////// void SGH3D::get_velgrad(DCArrayKokkos& vel_grad, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& elem_vol) const + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, + const DistributedDCArray& elem_vol) const { const size_t num_nodes_in_elem = 8; const size_t num_dims = 3; @@ -205,8 +205,8 @@ void SGH3D::get_velgrad(DCArrayKokkos& vel_grad, ///////////////////////////////////////////////////////////////////////////// void SGH3D::get_divergence(DCArrayKokkos& elem_div, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& elem_vol) const { // --- calculate the forces acting on the nodes from the element --- diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/position.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/position.cpp index e652a4bd8..2aa6b4413 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/position.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/position.cpp @@ -51,10 +51,10 @@ void SGH3D::update_position(double rk_alpha, double dt, const size_t num_dims, const size_t num_nodes, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0) const + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0) const { // loop over all the nodes in the mesh FOR_ALL(node_gid, 0, num_nodes, { diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/properties.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/properties.cpp index 4de5c1f43..880069d79 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/properties.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/properties.cpp @@ -67,8 +67,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGH3D::update_state( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_pres, @@ -292,8 +292,8 @@ void SGH3D::update_stress( const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index 04b8676c7..591a56096 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -667,9 +667,9 @@ double sum_domain_internal_energy(const DCArrayKokkos& MaterialPoints_ma /// ///////////////////////////////////////////////////////////////////////////// double sum_domain_kinetic_energy(const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass) + const DistributedDCArray& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass) { // extensive KE double KE_sum = 0.0; @@ -722,8 +722,8 @@ double sum_domain_material_mass(const DCArrayKokkos& MaterialPoints_mass /// ///////////////////////////////////////////////////////////////////////////// double sum_domain_node_mass(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass) + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass) { double mass_domain = 0.0; double mass_loc_domain; diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/time_integration.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/time_integration.cpp index a8b16e0c3..433530eec 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/time_integration.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/time_integration.cpp @@ -51,10 +51,10 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// ///////////////////////////////////////////////////////////////////////////// void SGH3D::rk_init( - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, DCArrayKokkos& MaterialPoints_sie, DCArrayKokkos& MaterialPoints_sie_n0, DCArrayKokkos& MaterialPoints_stress, @@ -107,8 +107,8 @@ void SGH3D::rk_init( /// ///////////////////////////////////////////////////////////////////////////// void SGH3D::get_timestep(Mesh_t& mesh, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_vel, + DistributedDCArray& node_coords, + DistributedDCArray& node_vel, DCArrayKokkos& GaussPoints_vol, DCArrayKokkos& MaterialPoints_sspd, DCArrayKokkos& MaterialPoints_eroded, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h index 9a02856b4..fc3f09129 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h @@ -190,20 +190,20 @@ class SGHRZ : public Solver void init_corner_node_masses_zero_rz( const Mesh_t& mesh, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass) const; // **** Functions defined in boundary.cpp **** // void boundary_velocity_rz( const Mesh_t& mesh, const BoundaryCondition_t& Boundary, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const; void boundary_contact_rz( const Mesh_t& mesh, const BoundaryCondition_t& Boundary, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const; // **** Functions defined in energy_SGHRZ.cpp **** // @@ -211,10 +211,10 @@ class SGHRZ : public Solver const double rk_alpha, const double dt, const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_sie_n0, const DCArrayKokkos& MaterialPoints_mass, @@ -231,8 +231,8 @@ class SGHRZ : public Solver const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, const DCArrayKokkos& corner_force, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_pres, @@ -257,26 +257,26 @@ class SGHRZ : public Solver double dt, const size_t num_dims, const size_t num_nodes, - DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0) const; + DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0) const; // **** Functions defined in momentum.cpp **** // void update_velocity_rz( double rk_alpha, double dt, const Mesh_t& mesh, - DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_mass, + DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_force) const; void get_velgrad_rz( DCArrayKokkos& elem_vel_grad, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& elem_vol) const; KOKKOS_FUNCTION @@ -287,16 +287,16 @@ class SGHRZ : public Solver void get_divergence_rz( DCArrayKokkos& GaussPoints_div, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vol) const; // **** Functions defined in properties.cpp **** // void update_state_rz( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_pres, @@ -324,8 +324,8 @@ class SGHRZ : public Solver const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, @@ -350,10 +350,10 @@ class SGHRZ : public Solver // **** Functions defined in time_integration.cpp **** // // NOTE: Consider pulling up void rk_init_rz( - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, DCArrayKokkos& MaterialPoints_sie, DCArrayKokkos& MaterialPoints_sie_n0, DCArrayKokkos& MaterialPoints_stress, @@ -366,8 +366,8 @@ class SGHRZ : public Solver void get_timestep_rz( Mesh_t& mesh, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_vel, + DistributedDCArray& node_coords, + DistributedDCArray& node_vel, DCArrayKokkos& GaussPoints_vol, DCArrayKokkos& MaterialPoints_sspd, DCArrayKokkos& MaterialPoints_eroded, @@ -388,27 +388,27 @@ class SGHRZ : public Solver void calc_corner_mass_rz(const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialToMeshMaps_elem, const size_t num_mat_elems); void calc_node_mass_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass); void calc_node_areal_mass_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, CArrayKokkos node_extensive_mass, double tiny); void calc_node_extensive_mass_rz(const CArrayKokkos& node_extensive_mass, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, double num_nodes); double sum_domain_internal_energy_rz(const DCArrayKokkos& MaterialPoints_mass, @@ -416,7 +416,7 @@ double sum_domain_internal_energy_rz(const DCArrayKokkos& MaterialPoints const size_t num_mat_points); double sum_domain_kinetic_energy_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const CArrayKokkos& node_extensive_mass); double sum_domain_material_mass_rz(const DCArrayKokkos& MaterialPoints_mass, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/boundary_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/boundary_rz.cpp index 2775e662e..7e8eab296 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/boundary_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/boundary_rz.cpp @@ -51,7 +51,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// void SGHRZ::boundary_velocity_rz(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const { @@ -102,7 +102,7 @@ void SGHRZ::boundary_velocity_rz(const Mesh_t& mesh, ///////////////////////////////////////////////////////////////////////////// void SGHRZ::boundary_contact_rz(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const { return; diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/energy_sgh_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/energy_sgh_rz.cpp index 49ea8f249..386cd9f88 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/energy_sgh_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/energy_sgh_rz.cpp @@ -56,10 +56,10 @@ void SGHRZ::update_energy_rz( const double rk_alpha, const double dt, const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_sie_n0, const DCArrayKokkos& MaterialPoints_mass, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h b/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h index 777ec4d22..2226050fd 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h @@ -230,24 +230,24 @@ class SGTM3D : public Solver void boundary_temperature( const Mesh_t& mesh, const BoundaryCondition_t& Boundary, - DCArrayKokkos& node_temp, + DistributedDCArray& node_temp, const double time_value) const; void boundary_convection( const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - const DCArrayKokkos& node_temp, - const DCArrayKokkos& node_flux, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_temp, + const DistributedDCArray& node_flux, + const DistributedDCArray& node_coords, const double time_value) const; void boundary_radiation( const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - const DCArrayKokkos& node_temp, - const DCArrayKokkos& node_flux, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_temp, + const DistributedDCArray& node_flux, + const DistributedDCArray& node_coords, const double time_value) const; void boundary_heat_flux( @@ -261,10 +261,10 @@ class SGTM3D : public Solver const double rk_alpha, const double dt, const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_sie_n0, const DCArrayKokkos& MaterialPoints_mass, @@ -289,8 +289,8 @@ class SGTM3D : public Solver const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_temp, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_temp, const DCArrayKokkos& MaterialPoints_q_flux, const DCArrayKokkos& corner_q_flux, const DCArrayKokkos& MaterialPoints_conductivity, @@ -309,7 +309,7 @@ class SGTM3D : public Solver const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& corner_q_flux, const DCArrayKokkos& sphere_position, const corners_in_mat_t corners_in_mat_elem, @@ -327,10 +327,10 @@ class SGTM3D : public Solver double dt, const size_t num_dims, const size_t num_nodes, - DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0) const; + DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0) const; // **** Functions defined in momentum.cpp **** // @@ -346,8 +346,8 @@ class SGTM3D : public Solver void update_state( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_pres, const DCArrayKokkos& MaterialPoints_stress, @@ -366,12 +366,12 @@ class SGTM3D : public Solver // **** Functions defined in time_integration.cpp **** // // NOTE: Consider pulling up void rk_init( - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, - DCArrayKokkos& node_temp, - DCArrayKokkos& node_temp_n0, + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, + DistributedDCArray& node_temp, + DistributedDCArray& node_temp_n0, DCArrayKokkos& MaterialPoints_q_flux, DCArrayKokkos& MaterialPoints_stress, const size_t num_dims, @@ -381,8 +381,8 @@ class SGTM3D : public Solver void get_timestep( Mesh_t& mesh, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_vel, + DistributedDCArray& node_coords, + DistributedDCArray& node_vel, DCArrayKokkos& GaussPoints_vol, DCArrayKokkos& MaterialPoints_sspd, DCArrayKokkos& MaterialPoints_conductivity, diff --git a/single-node-refactor/src/common/src/region_fill.cpp b/single-node-refactor/src/common/src/region_fill.cpp index 539501c8f..68ea64a3c 100644 --- a/single-node-refactor/src/common/src/region_fill.cpp +++ b/single-node-refactor/src/common/src/region_fill.cpp @@ -227,8 +227,8 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, void fill_regions( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos & node_coords, - DCArrayKokkos & node_vel, + const DistributedDCArray & node_coords, + DistributedDCArray & node_vel, DCArrayKokkos & node_temp, DCArrayKokkos & gauss_den, DCArrayKokkos & gauss_sie, From 8aca6b50b68cf20f6ecae448afe8b54756b40c91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 25 May 2025 08:31:45 -0600 Subject: [PATCH 11/66] WIP: MPI work --- .../Solvers/SGH_solver_rz/src/force_sgh_rz.cpp | 4 ++-- .../Solvers/SGH_solver_rz/src/momentum_rz.cpp | 14 +++++++------- .../Solvers/SGH_solver_rz/src/position_rz.cpp | 8 ++++---- .../Solvers/SGH_solver_rz/src/properties_rz.cpp | 8 ++++---- .../Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp | 8 ++++---- .../Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp | 10 +++++----- .../SGH_solver_rz/src/time_integration_rz.cpp | 12 ++++++------ .../src/Solvers/SGTM_solver_3D/src/boundary.cpp | 14 +++++++------- .../Solvers/SGTM_solver_3D/src/energy_sgtm.cpp | 8 ++++---- .../src/Solvers/SGTM_solver_3D/src/heat_flux.cpp | 6 +++--- .../src/Solvers/SGTM_solver_3D/src/momentum.cpp | 4 ++-- .../src/Solvers/SGTM_solver_3D/src/position.cpp | 2 +- .../Solvers/SGTM_solver_3D/src/properties.cpp | 4 ++-- .../SGTM_solver_3D/src/temperature_sgtm.cpp | 8 ++++---- .../SGTM_solver_3D/src/time_integration.cpp | 16 ++++++++-------- 15 files changed, 63 insertions(+), 63 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/force_sgh_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/force_sgh_rz.cpp index 895cfc68f..8137fe7b6 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/force_sgh_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/force_sgh_rz.cpp @@ -71,8 +71,8 @@ void SGHRZ::get_force_rz(const Material_t& Materials, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, const DCArrayKokkos& corner_force, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_pres, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp index 32830f3bb..bd96217ab 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp @@ -53,9 +53,9 @@ void SGHRZ::update_velocity_rz( double rk_alpha, double dt, const Mesh_t& mesh, - DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_mass, + DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_force) const { const size_t num_dims = 2; @@ -105,8 +105,8 @@ void SGHRZ::update_velocity_rz( void SGHRZ::get_velgrad_rz( DCArrayKokkos& elem_vel_grad, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& elem_vol) const { // --- calculate the forces acting on the nodes from the element --- @@ -248,8 +248,8 @@ void SGHRZ::get_velgrad_rz(ViewCArrayKokkos& vel_grad, ///////////////////////////////////////////////////////////////////////////// void SGHRZ::get_divergence_rz(DCArrayKokkos& elem_div, const Mesh_t mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& elem_vol) const { // --- calculate the forces acting on the nodes from the element --- diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/position_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/position_rz.cpp index e60f620db..0d9427724 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/position_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/position_rz.cpp @@ -52,10 +52,10 @@ void SGHRZ::update_position_rz( double dt, const size_t num_dims, const size_t num_nodes, - DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0) const + DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0) const { // loop over all the nodes in the mesh FOR_ALL(node_gid, 0, num_nodes, { diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/properties_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/properties_rz.cpp index fd59edb33..430b7cc1e 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/properties_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/properties_rz.cpp @@ -67,8 +67,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGHRZ::update_state_rz( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GuassPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_pres, @@ -307,8 +307,8 @@ void SGHRZ::update_state_rz( void SGHRZ::update_stress(const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& GuassPoints_vel_grad, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sie, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp index 434b83400..2460bf17e 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp @@ -627,8 +627,8 @@ double sum_domain_node_mass_rz(const CArrayKokkos& node_extensive_mass, /// ///////////////////////////////////////////////////////////////////////////// void calc_node_extensive_mass_rz(const CArrayKokkos& node_extensive_mass, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, double num_nodes) { // save the nodal mass @@ -656,8 +656,8 @@ void calc_node_extensive_mass_rz(const CArrayKokkos& node_extensive_mass /// ///////////////////////////////////////////////////////////////////////////// void calc_node_areal_mass_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const CArrayKokkos node_extensive_mass, double tiny) { diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp index 7e9c03ab3..70cc71de1 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp @@ -53,7 +53,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// ///////////////////////////////////////////////////////////////////////////// void SGHRZ::init_corner_node_masses_zero_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass) const { @@ -157,8 +157,8 @@ void SGHRZ::setup(SimulationParameters_t& SimulationParameters, ///////////////////////////////////////////////////////////////////////////// void calc_corner_mass_rz(const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialToMeshMaps_elem, @@ -205,8 +205,8 @@ void calc_corner_mass_rz(const Material_t& Materials, /// ///////////////////////////////////////////////////////////////////////////// void calc_node_mass_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass) { diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/time_integration_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/time_integration_rz.cpp index 80facb9d6..3f88226c3 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/time_integration_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/time_integration_rz.cpp @@ -52,10 +52,10 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// ///////////////////////////////////////////////////////////////////////////// void SGHRZ::rk_init_rz( - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, DCArrayKokkos& MaterialPoints_sie, DCArrayKokkos& MaterialPoints_sie_n0, DCArrayKokkos& MaterialPoints_stress, @@ -110,8 +110,8 @@ void SGHRZ::rk_init_rz( /// ///////////////////////////////////////////////////////////////////////////// void SGHRZ::get_timestep_rz(Mesh_t& mesh, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_vel, + DistributedDCArray& node_coords, + DistributedDCArray& node_vel, DCArrayKokkos& GaussPoints_vol, DCArrayKokkos& MaterialPoints_sspd, DCArrayKokkos& MaterialPoints_eroded, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp index 0556a1dfc..02aa7e60c 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp @@ -100,9 +100,9 @@ void SGTM3D::boundary_temperature(const Mesh_t& mesh, ///////////////////////////////////////////////////////////////////////////// void SGTM3D::boundary_convection(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - const DCArrayKokkos& node_temp, - const DCArrayKokkos& node_flux, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_temp, + const DistributedDCArray& node_flux, + const DistributedDCArray& node_coords, const double time_value) const { // ---- Loop over boundary sets ---- // @@ -240,9 +240,9 @@ void SGTM3D::boundary_convection(const Mesh_t& mesh, ///////////////////////////////////////////////////////////////////////////// void SGTM3D::boundary_radiation(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - const DCArrayKokkos& node_temp, - const DCArrayKokkos& node_flux, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_temp, + const DistributedDCArray& node_flux, + const DistributedDCArray& node_coords, const double time_value) const { // ---- Loop over boundary sets ---- // @@ -388,7 +388,7 @@ void SGTM3D::boundary_radiation(const Mesh_t& mesh, ///////////////////////////////////////////////////////////////////////////// void SGTM3D::boundary_heat_flux(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_temp, + DistributedDCArray& node_temp, const double time_value) const { // // Loop over boundary sets diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/energy_sgtm.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/energy_sgtm.cpp index 4af2592a8..850f5e177 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/energy_sgtm.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/energy_sgtm.cpp @@ -56,10 +56,10 @@ void SGTM3D::update_temperature( const double rk_alpha, const double dt, const Mesh_t& mesh, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_coords_n0, const DCArrayKokkos& MaterialPoints_sie, const DCArrayKokkos& MaterialPoints_sie_n0, const DCArrayKokkos& MaterialPoints_mass, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/heat_flux.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/heat_flux.cpp index 986a18699..942796003 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/heat_flux.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/heat_flux.cpp @@ -71,8 +71,8 @@ void SGTM3D::get_heat_flux( const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_temp, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_temp, const DCArrayKokkos& MaterialPoints_q_flux, const DCArrayKokkos& MaterialPoints_conductivity, const DCArrayKokkos& MaterialPoints_temp_grad, @@ -221,7 +221,7 @@ void SGTM3D::moving_flux( const Material_t& Materials, const Mesh_t& mesh, const DCArrayKokkos& GaussPoints_vol, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& corner_q_flux, const DCArrayKokkos& sphere_position, const corners_in_mat_t corners_in_mat_elem, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/momentum.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/momentum.cpp index 12879f960..17890246a 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/momentum.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/momentum.cpp @@ -52,8 +52,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGTM3D::update_velocity(double rk_alpha, double dt, const Mesh_t& mesh, - DCArrayKokkos& node_vel, - const DCArrayKokkos& node_mass, + DistributedDCArray& node_vel, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_force) const { // const size_t num_dims = mesh.num_dims; diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp index d6bf2d67b..4386350c0 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp @@ -52,7 +52,7 @@ void SGTM3D::update_position( double dt, const size_t num_dims, const size_t num_nodes, - DCArrayKokkos& node_coords, + DistributedDCArray& node_coords, const DCArrayKokkos& node_coords_n0, const DCArrayKokkos& node_vel, const DCArrayKokkos& node_vel_n0) const diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/properties.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/properties.cpp index 3ffa5dd6a..9230638eb 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/properties.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/properties.cpp @@ -63,8 +63,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGTM3D::update_state( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_pres, const DCArrayKokkos& MaterialPoints_stress, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/temperature_sgtm.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/temperature_sgtm.cpp index a5b9a1e2f..436b9992f 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/temperature_sgtm.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/temperature_sgtm.cpp @@ -54,10 +54,10 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void SGTM3D::update_temperature( const Mesh_t& mesh, const DCArrayKokkos& corner_q_transfer, - const DCArrayKokkos& node_temp, - const DCArrayKokkos& node_temp_n0, - const DCArrayKokkos& node_mass, - const DCArrayKokkos& node_q_transfer, + const DistributedDCArray& node_temp, + const DistributedDCArray& node_temp_n0, + const DistributedDCArray& node_mass, + const DistributedDCArray& node_q_transfer, const DCArrayKokkos& mat_pt_sepcific_heat, const double rk_alpha, const double dt) const diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp index 5943c05f5..5df77eb15 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp @@ -51,12 +51,12 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /// ///////////////////////////////////////////////////////////////////////////// void SGTM3D::rk_init( - DCArrayKokkos& node_coords, - DCArrayKokkos& node_coords_n0, - DCArrayKokkos& node_vel, - DCArrayKokkos& node_vel_n0, - DCArrayKokkos& node_temp, - DCArrayKokkos& node_temp_n0, + DistributedDCArray& node_coords, + DistributedDCArray& node_coords_n0, + DistributedDCArray& node_vel, + DistributedDCArray& node_vel_n0, + DistributedDCArray& node_temp, + DistributedDCArray& node_temp_n0, DCArrayKokkos& MaterialPoints_q_flux, DCArrayKokkos& MaterialPoints_stress, const size_t num_dims, @@ -100,8 +100,8 @@ std::cout << "done with rk_int \n "; /// ///////////////////////////////////////////////////////////////////////////// void SGTM3D::get_timestep(Mesh_t& mesh, - DCArrayKokkos& node_coords, - DCArrayKokkos& node_vel, + DistributedDCArray& node_coords, + DistributedDCArray& node_vel, DCArrayKokkos& GaussPoints_vol, DCArrayKokkos& MaterialPoints_sspd, DCArrayKokkos& MaterialPoints_conductivity, From e3e70b74ecd5edb0a58512b8ebcd504f97126815 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 25 May 2025 18:02:31 -0600 Subject: [PATCH 12/66] WIP: MPI work --- .../src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h | 4 ++-- .../src/Solvers/SGH_solver_3D/src/momentum.cpp | 4 ++-- .../level_set_solver/include/level_set_solver.h | 8 ++++---- .../Solvers/level_set_solver/src/solver_functions.cpp | 2 +- .../Solvers/level_set_solver/src/time_integration.cpp | 4 ++-- .../boundary_conditions/temperature/constant_temp_bc.h | 2 +- .../velocity/constant_velocity_bc.h | 2 +- .../src/boundary_conditions/velocity/no_velocity_bc.h | 2 +- .../boundary_conditions/velocity/piston_velocity_bc.h | 2 +- .../velocity/reflected_velocity_bc.h | 2 +- .../velocity/time_varying_velocity_bc.h | 2 +- .../velocity/user_defined_velocity_bc.h | 2 +- .../boundary_conditions/velocity/zero_velocity_bc.h | 2 +- .../src/common/include/boundary_conditions.h | 6 +++--- single-node-refactor/src/common/include/material.h | 8 +++++--- single-node-refactor/src/common/include/region_fill.h | 10 +++++----- single-node-refactor/src/common/src/region_fill.cpp | 2 +- .../src/input/parse_material_inputs.hpp | 3 ++- .../src/material_models/artificial_viscosity/mars.h | 8 ++++---- .../artificial_viscosity/no_dissipation.h | 2 +- .../src/material_models/strength/host_ann_strength.h | 4 ++-- .../strength/host_user_defined_strength.h | 4 ++-- .../src/material_models/strength/no_strength.h | 4 ++-- .../material_models/strength/user_defined_strength.h | 4 ++-- 24 files changed, 48 insertions(+), 45 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h index 3aca4c8eb..90e4d4cca 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h @@ -222,13 +222,13 @@ class SGH3D : public Solver void boundary_velocity( const Mesh_t& mesh, const BoundaryCondition_t& Boundary, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const; void boundary_contact( const Mesh_t& mesh, const BoundaryCondition_t& Boundary, - DCArrayKokkos& node_vel, + DistributedDCArray& node_vel, const double time_value) const; void boundary_stress(const Mesh_t& mesh, diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp index 7455add18..c26ada2e4 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp @@ -56,7 +56,7 @@ void SGH3D::update_velocity(double rk_alpha, DistributedDCArray& node_vel_n0, const DistributedDCArray& node_mass, const DistributedDCArray& node_force, - const DistributedDCArray& corner_force) const + const DCArrayKokkos& corner_force) const { const size_t num_dims = mesh.num_dims; @@ -102,7 +102,7 @@ void SGH3D::get_velgrad(DCArrayKokkos& vel_grad, const Mesh_t mesh, const DistributedDCArray& node_coords, const DistributedDCArray& node_vel, - const DistributedDCArray& elem_vol) const + const DCArrayKokkos& elem_vol) const { const size_t num_nodes_in_elem = 8; const size_t num_dims = 3; diff --git a/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h b/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h index e33c75a0d..ef9e63ef2 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h +++ b/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h @@ -176,7 +176,7 @@ class LevelSet : public Solver // **** Functions defined in solver_functions.cpp **** // void nodal_gradient( const Mesh_t mesh, - const DCArrayKokkos& Node_coords, + const DistributedDCArray& Node_coords, const DCArrayKokkos& node_level_set_vel, const DCArrayKokkos& Node_grad_level_set, const DCArrayKokkos& Corner_normal, @@ -218,7 +218,7 @@ class LevelSet : public Solver void get_timestep( const Mesh_t& mesh, const Material_t& Materials, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& GaussPoints_vol, const DCArrayKokkos& MaterialToMeshMaps_elem, const size_t num_mat_elems, @@ -236,7 +236,7 @@ class LevelSet : public Solver void get_timestep_2D( const Mesh_t& mesh, const Material_t& Materials, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& GaussPoints_vol, const DCArrayKokkos& MaterialToMeshMaps_elem, const size_t num_mat_elems, @@ -257,7 +257,7 @@ class LevelSet : public Solver void boundary_velocity( const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_vel, + DCArrayKokkos& node_level_set_vel, const double time_value, const double small) const; diff --git a/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp b/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp index 5ecf8c3ed..f14e0432e 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp +++ b/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp @@ -53,7 +53,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// void LevelSet::nodal_gradient( const Mesh_t mesh, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& node_level_set_vel, const DCArrayKokkos& node_grad_level_set, const DCArrayKokkos& corner_normal, diff --git a/single-node-refactor/src/Solvers/level_set_solver/src/time_integration.cpp b/single-node-refactor/src/Solvers/level_set_solver/src/time_integration.cpp index 00ed17d63..6d973a56b 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/src/time_integration.cpp +++ b/single-node-refactor/src/Solvers/level_set_solver/src/time_integration.cpp @@ -93,7 +93,7 @@ void LevelSet::rk_init( void LevelSet::get_timestep( const Mesh_t& mesh, const Material_t& Materials, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& GaussPoints_vol, const DCArrayKokkos& MaterialToMeshMaps_elem, const size_t num_mat_elems, @@ -220,7 +220,7 @@ void LevelSet::get_timestep( void LevelSet::get_timestep_2D( const Mesh_t& mesh, const Material_t& Materials, - const DCArrayKokkos& node_coords, + const DistributedDCArray& node_coords, const DCArrayKokkos& GaussPoints_vol, const DCArrayKokkos& MaterialToMeshMaps_elem, const size_t num_mat_elems, diff --git a/single-node-refactor/src/boundary_conditions/temperature/constant_temp_bc.h b/single-node-refactor/src/boundary_conditions/temperature/constant_temp_bc.h index bcf390daa..3453ddb2a 100644 --- a/single-node-refactor/src/boundary_conditions/temperature/constant_temp_bc.h +++ b/single-node-refactor/src/boundary_conditions/temperature/constant_temp_bc.h @@ -63,7 +63,7 @@ static void temperature(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& temp_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_temp, + const DistributedDCArray& node_temp, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/constant_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/constant_velocity_bc.h index a1d487c61..063a47225 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/constant_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/constant_velocity_bc.h @@ -62,7 +62,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/no_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/no_velocity_bc.h index 9a0a4e1c7..023e9bba2 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/no_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/no_velocity_bc.h @@ -62,7 +62,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/piston_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/piston_velocity_bc.h index e11303f1a..6d97b8de6 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/piston_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/piston_velocity_bc.h @@ -77,7 +77,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/reflected_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/reflected_velocity_bc.h index 3cb14a997..5bdb1089b 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/reflected_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/reflected_velocity_bc.h @@ -64,7 +64,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/time_varying_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/time_varying_velocity_bc.h index 09a78a216..bbc9e48b1 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/time_varying_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/time_varying_velocity_bc.h @@ -78,7 +78,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/user_defined_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/user_defined_velocity_bc.h index 0024ef024..92b02aec5 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/user_defined_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/user_defined_velocity_bc.h @@ -64,7 +64,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/boundary_conditions/velocity/zero_velocity_bc.h b/single-node-refactor/src/boundary_conditions/velocity/zero_velocity_bc.h index 8334eb99c..4daacb273 100644 --- a/single-node-refactor/src/boundary_conditions/velocity/zero_velocity_bc.h +++ b/single-node-refactor/src/boundary_conditions/velocity/zero_velocity_bc.h @@ -64,7 +64,7 @@ static void velocity(const Mesh_t& mesh, const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/common/include/boundary_conditions.h b/single-node-refactor/src/common/include/boundary_conditions.h index beaaa6c13..eb9665975 100644 --- a/single-node-refactor/src/common/include/boundary_conditions.h +++ b/single-node-refactor/src/common/include/boundary_conditions.h @@ -208,7 +208,7 @@ struct BoundaryConditionFunctions_t const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& vel_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, @@ -219,7 +219,7 @@ struct BoundaryConditionFunctions_t const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& temp_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_temp, + const DistributedDCArray& node_temp, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, @@ -230,7 +230,7 @@ struct BoundaryConditionFunctions_t const DCArrayKokkos& BoundaryConditionEnums, const RaggedRightArrayKokkos& heat_flux_bc_global_vars, const DCArrayKokkos& bc_state_vars, - const DCArrayKokkos& node_temp, + const DistributedDCArray& node_temp, const double time_value, const size_t rk_stage, const size_t bdy_node_gid, diff --git a/single-node-refactor/src/common/include/material.h b/single-node-refactor/src/common/include/material.h index 1cff051b4..3bb3341a4 100644 --- a/single-node-refactor/src/common/include/material.h +++ b/single-node-refactor/src/common/include/material.h @@ -40,6 +40,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. using namespace mtr; +template +using DistributedDCArray = TpetraDCArray; namespace model { // strength model types @@ -322,8 +324,8 @@ struct MaterialFunctions_t // Material strength model function pointers void (*calc_stress)( const DCArrayKokkos &GaussPoints_vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, const DCArrayKokkos &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, @@ -381,7 +383,7 @@ struct MaterialFunctions_t const RaggedRightArrayKokkos & dissipation_global_vars, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sspd, const ViewCArrayKokkos& disp_corner_forces, diff --git a/single-node-refactor/src/common/include/region_fill.h b/single-node-refactor/src/common/include/region_fill.h index da60ac0ce..0319e9790 100644 --- a/single-node-refactor/src/common/include/region_fill.h +++ b/single-node-refactor/src/common/include/region_fill.h @@ -69,9 +69,9 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, void fill_regions( const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos & node_coords, - DCArrayKokkos & node_vel, - DCArrayKokkos & node_temp, + const DistributedDCArray & node_coords, + DistributedDCArray & node_vel, + DistributedDCArray & node_temp, DCArrayKokkos & gauss_den, DCArrayKokkos & gauss_sie, DCArrayKokkos & gauss_use_sie, @@ -250,7 +250,7 @@ void paint_multi_scalar(const DCArrayKokkos& field_scalar, /// ///////////////////////////////////////////////////////////////////////////// KOKKOS_FUNCTION -void paint_scalar(const DCArrayKokkos& field_scalar, +void paint_scalar(const DistributedDCArray& field_scalar, const ViewCArrayKokkos mesh_coords, const double scalar, const double slope, @@ -275,7 +275,7 @@ void paint_scalar(const DCArrayKokkos& field_scalar, /// ///////////////////////////////////////////////////////////////////////////// KOKKOS_FUNCTION -void paint_vector(const DCArrayKokkos& vector_field, +void paint_vector(const DistributedDCArray& vector_field, const ViewCArrayKokkos & mesh_coords, const double u, const double v, diff --git a/single-node-refactor/src/common/src/region_fill.cpp b/single-node-refactor/src/common/src/region_fill.cpp index 68ea64a3c..f2433bd87 100644 --- a/single-node-refactor/src/common/src/region_fill.cpp +++ b/single-node-refactor/src/common/src/region_fill.cpp @@ -229,7 +229,7 @@ void fill_regions( const Mesh_t& mesh, const DistributedDCArray & node_coords, DistributedDCArray & node_vel, - DCArrayKokkos & node_temp, + DistributedDCArray & node_temp, DCArrayKokkos & gauss_den, DCArrayKokkos & gauss_sie, DCArrayKokkos & gauss_use_sie, diff --git a/single-node-refactor/src/input/parse_material_inputs.hpp b/single-node-refactor/src/input/parse_material_inputs.hpp index 694f9a988..f17bd01d6 100644 --- a/single-node-refactor/src/input/parse_material_inputs.hpp +++ b/single-node-refactor/src/input/parse_material_inputs.hpp @@ -53,7 +53,8 @@ struct MaterialFunctions_t; struct MaterialEnums_t; using namespace mtr; - +template +using DistributedDCArray = TpetraDCArray; // parse the material text void parse_materials(Yaml::Node& root, Material_t& Materials, const size_t num_dims); diff --git a/single-node-refactor/src/material_models/artificial_viscosity/mars.h b/single-node-refactor/src/material_models/artificial_viscosity/mars.h index 941372702..a8b3f4fb9 100644 --- a/single-node-refactor/src/material_models/artificial_viscosity/mars.h +++ b/single-node-refactor/src/material_models/artificial_viscosity/mars.h @@ -48,7 +48,7 @@ namespace MARSDissipationModel { const RaggedRightArrayKokkos & dissipation_global_vars, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sspd, const ViewCArrayKokkos& disp_corner_forces, @@ -364,7 +364,7 @@ namespace DirMARSDissipationModel { const RaggedRightArrayKokkos & dissipation_global_vars, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sspd, const ViewCArrayKokkos& disp_corner_forces, @@ -659,7 +659,7 @@ namespace MARSRZDissipationModel { const RaggedRightArrayKokkos & dissipation_global_vars, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sspd, const ViewCArrayKokkos& disp_corner_forces, @@ -932,7 +932,7 @@ namespace DirMARSRZDissipationModel { const RaggedRightArrayKokkos & dissipation_global_vars, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sspd, const ViewCArrayKokkos& disp_corner_forces, diff --git a/single-node-refactor/src/material_models/artificial_viscosity/no_dissipation.h b/single-node-refactor/src/material_models/artificial_viscosity/no_dissipation.h index b5774c908..366a61626 100644 --- a/single-node-refactor/src/material_models/artificial_viscosity/no_dissipation.h +++ b/single-node-refactor/src/material_models/artificial_viscosity/no_dissipation.h @@ -45,7 +45,7 @@ namespace NoDissipationModel { const RaggedRightArrayKokkos & dissipation_global_vars, const DCArrayKokkos& GaussPoints_vel_grad, const DCArrayKokkos& MaterialPoints_eroded, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const DCArrayKokkos& MaterialPoints_den, const DCArrayKokkos& MaterialPoints_sspd, const ViewCArrayKokkos& disp_corner_forces, diff --git a/single-node-refactor/src/material_models/strength/host_ann_strength.h b/single-node-refactor/src/material_models/strength/host_ann_strength.h index 926707b46..2e4569164 100644 --- a/single-node-refactor/src/material_models/strength/host_ann_strength.h +++ b/single-node-refactor/src/material_models/strength/host_ann_strength.h @@ -219,8 +219,8 @@ namespace HostANNStrengthModel { // this model is launched from the CPU, coding inside is run on GPUS static void calc_stress( const DCArrayKokkos &GaussPoints_vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, const DCArrayKokkos &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, diff --git a/single-node-refactor/src/material_models/strength/host_user_defined_strength.h b/single-node-refactor/src/material_models/strength/host_user_defined_strength.h index 76842a845..fc0d22a83 100644 --- a/single-node-refactor/src/material_models/strength/host_user_defined_strength.h +++ b/single-node-refactor/src/material_models/strength/host_user_defined_strength.h @@ -94,8 +94,8 @@ namespace HostUserDefinedStrengthModel { // function is accessed on the Host static void calc_stress( const DCArrayKokkos& GaussPoints_vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, const ViewCArrayKokkos& elem_node_gids, const DCArrayKokkos& MaterialPoints_pres, const DCArrayKokkos& MaterialPoints_stress, diff --git a/single-node-refactor/src/material_models/strength/no_strength.h b/single-node-refactor/src/material_models/strength/no_strength.h index 00b34725a..c55cb0ce3 100644 --- a/single-node-refactor/src/material_models/strength/no_strength.h +++ b/single-node-refactor/src/material_models/strength/no_strength.h @@ -73,8 +73,8 @@ namespace NoStrengthModel { KOKKOS_FUNCTION static void calc_stress( const DCArrayKokkos &vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, const DCArrayKokkos &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, diff --git a/single-node-refactor/src/material_models/strength/user_defined_strength.h b/single-node-refactor/src/material_models/strength/user_defined_strength.h index d6ba656b9..fba1f286c 100644 --- a/single-node-refactor/src/material_models/strength/user_defined_strength.h +++ b/single-node-refactor/src/material_models/strength/user_defined_strength.h @@ -94,8 +94,8 @@ namespace UserDefinedStrengthModel { KOKKOS_FUNCTION static void calc_stress( const DCArrayKokkos &GaussPoints_vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, const DCArrayKokkos &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, From c3a12ade09c241ebdf6f989b6dedc40ba23a4906 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 25 May 2025 22:32:07 -0600 Subject: [PATCH 13/66] WIP: MPI work --- .../src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h | 2 +- .../src/Solvers/SGH_solver_3D/src/boundary.cpp | 2 +- .../src/Solvers/SGH_solver_3D/src/sgh_execute.cpp | 2 +- single-node-refactor/src/common/include/material.h | 2 +- single-node-refactor/src/common/include/region_fill.h | 10 +++++----- .../src/material_models/strength/host_ann_strength.h | 2 +- .../strength/host_user_defined_strength.h | 2 +- .../src/material_models/strength/no_strength.h | 2 +- .../material_models/strength/user_defined_strength.h | 4 ++-- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h index 90e4d4cca..5d7fd2e7a 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h @@ -233,7 +233,7 @@ class SGH3D : public Solver void boundary_stress(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_bdy_force, + DistributedDCArray& node_bdy_force, DistributedDCArray& node_coords, const double time_value) const; diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp index 62f07db73..4ad35f2e3 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/boundary.cpp @@ -118,7 +118,7 @@ void SGH3D::boundary_contact(const Mesh_t& mesh, ///////////////////////////////////////////////////////////////////////////// void SGH3D::boundary_stress(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_bdy_force, + DistributedDCArray& node_bdy_force, DistributedDCArray& node_coords, const double time_value) const { diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index 591a56096..997b8b142 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -59,7 +59,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, { double fuzz = SimulationParameters.dynamic_options.fuzz; // 1.e-16 - double tiny = SimulationParamaters.dynamic_options.tiny; // 1.e-12 + double tiny = SimulationParameters.dynamic_options.tiny; // 1.e-12 double small = SimulationParameters.dynamic_options.small; // 1.e-8 double graphics_dt_ival = SimulationParameters.output_options.graphics_time_step; diff --git a/single-node-refactor/src/common/include/material.h b/single-node-refactor/src/common/include/material.h index 3bb3341a4..1056b487d 100644 --- a/single-node-refactor/src/common/include/material.h +++ b/single-node-refactor/src/common/include/material.h @@ -326,7 +326,7 @@ struct MaterialFunctions_t const DCArrayKokkos &GaussPoints_vel_grad, const DistributedDCArray &node_coords, const DistributedDCArray &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, const DCArrayKokkos &MaterialPoints_stress_n0, diff --git a/single-node-refactor/src/common/include/region_fill.h b/single-node-refactor/src/common/include/region_fill.h index 0319e9790..3e176029a 100644 --- a/single-node-refactor/src/common/include/region_fill.h +++ b/single-node-refactor/src/common/include/region_fill.h @@ -386,8 +386,8 @@ void init_press_sspd_stress(const Material_t& Materials, ///////////////////////////////////////////////////////////////////////////// void calc_corner_mass(const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass, const DCArrayKokkos& MaterialPoints_mass, const DCArrayKokkos& MaterialToMeshMaps_elem, @@ -409,8 +409,8 @@ void calc_corner_mass(const Material_t& Materials, /// ///////////////////////////////////////////////////////////////////////////// void calc_node_mass(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass); @@ -418,7 +418,7 @@ void calc_node_mass(const Mesh_t& mesh, void init_corner_node_masses_zero( const Mesh_t& mesh, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass); #endif \ No newline at end of file diff --git a/single-node-refactor/src/material_models/strength/host_ann_strength.h b/single-node-refactor/src/material_models/strength/host_ann_strength.h index 2e4569164..6fab825d0 100644 --- a/single-node-refactor/src/material_models/strength/host_ann_strength.h +++ b/single-node-refactor/src/material_models/strength/host_ann_strength.h @@ -221,7 +221,7 @@ namespace HostANNStrengthModel { const DCArrayKokkos &GaussPoints_vel_grad, const DistributedDCArray &node_coords, const DistributedDCArray &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, const DCArrayKokkos &MaterialPoints_stress_n0, diff --git a/single-node-refactor/src/material_models/strength/host_user_defined_strength.h b/single-node-refactor/src/material_models/strength/host_user_defined_strength.h index fc0d22a83..1ff1ed010 100644 --- a/single-node-refactor/src/material_models/strength/host_user_defined_strength.h +++ b/single-node-refactor/src/material_models/strength/host_user_defined_strength.h @@ -204,7 +204,7 @@ namespace HostNotionalStrengthModel { const DCArrayKokkos &vel_grad, const DCArrayKokkos &node_coords, const DCArrayKokkos &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, const DCArrayKokkos &MaterialPoints_sspd, diff --git a/single-node-refactor/src/material_models/strength/no_strength.h b/single-node-refactor/src/material_models/strength/no_strength.h index c55cb0ce3..12f9986c9 100644 --- a/single-node-refactor/src/material_models/strength/no_strength.h +++ b/single-node-refactor/src/material_models/strength/no_strength.h @@ -75,7 +75,7 @@ namespace NoStrengthModel { const DCArrayKokkos &vel_grad, const DistributedDCArray &node_coords, const DistributedDCArray &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, const DCArrayKokkos &MaterialPoints_stress_n0, diff --git a/single-node-refactor/src/material_models/strength/user_defined_strength.h b/single-node-refactor/src/material_models/strength/user_defined_strength.h index fba1f286c..e7a6f2b57 100644 --- a/single-node-refactor/src/material_models/strength/user_defined_strength.h +++ b/single-node-refactor/src/material_models/strength/user_defined_strength.h @@ -96,7 +96,7 @@ namespace UserDefinedStrengthModel { const DCArrayKokkos &GaussPoints_vel_grad, const DistributedDCArray &node_coords, const DistributedDCArray &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, const DCArrayKokkos &MaterialPoints_stress_n0, @@ -201,7 +201,7 @@ namespace NotionalStrengthModel { const DCArrayKokkos &GaussPoints_vel_grad, const DCArrayKokkos &node_coords, const DCArrayKokkos &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &nodes_in_elem, const DCArrayKokkos &MaterialPoints_pres, const DCArrayKokkos &MaterialPoints_stress, const DCArrayKokkos &MaterialPoints_sspd, From 8d73e2f08a89126208914d0930bc649840f05f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 27 May 2025 16:52:28 -0600 Subject: [PATCH 14/66] WIP: MPI work --- single-node-refactor/CMakeLists.txt | 1 + single-node-refactor/src/CMakeLists.txt | 4 +--- .../SGTM_solver_3D/include/sgtm_solver_3D.h | 16 ++++++++-------- .../src/Solvers/SGTM_solver_3D/src/boundary.cpp | 2 +- .../src/Solvers/SGTM_solver_3D/src/position.cpp | 6 +++--- .../SGTM_solver_3D/src/time_integration.cpp | 2 +- .../level_set_solver/include/level_set_solver.h | 10 +++++----- .../level_set_solver/src/level_set_boundary.cpp | 2 +- .../level_set_solver/src/level_set_execute.cpp | 2 +- .../level_set_solver/src/solver_functions.cpp | 8 ++++---- 10 files changed, 26 insertions(+), 27 deletions(-) diff --git a/single-node-refactor/CMakeLists.txt b/single-node-refactor/CMakeLists.txt index 98a745760..c86ed4796 100755 --- a/single-node-refactor/CMakeLists.txt +++ b/single-node-refactor/CMakeLists.txt @@ -75,6 +75,7 @@ if (FIERRO_ENABLE_TRILINOS) else() find_package(Kokkos REQUIRED) list(APPEND LINKING_LIBRARIES Kokkos::kokkos) + add_definitions(-DHAVE_KOKKOS=1) endif() find_package(Matar REQUIRED) diff --git a/single-node-refactor/src/CMakeLists.txt b/single-node-refactor/src/CMakeLists.txt index 84f284c27..b75ac1d85 100755 --- a/single-node-refactor/src/CMakeLists.txt +++ b/single-node-refactor/src/CMakeLists.txt @@ -38,8 +38,6 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${VECTOR_CXX_FLAGS}") message("\n ****** ADDING FIERRO EXECUTABLE ******** \n ") -add_definitions(-DHAVE_KOKKOS=1) - if (CUDA) add_definitions(-DHAVE_CUDA=1) elseif (HIP) @@ -71,4 +69,4 @@ include_directories(Solvers/level_set_solver/include) add_subdirectory(Solvers/level_set_solver) add_executable(Fierro main.cpp driver.cpp solver.cpp ${COMMON_Files} ${EQUILIBRATION_Files} ${YAML_SRC_Files} ${SGH_3D_SRC_Files} ${SGH_RZ_SRC_Files} ${SGTM_3D_SRC_Files} ${LVLSET_SRC_Files}) -target_link_libraries(Fierro PRIVATE matar Kokkos::kokkos) +target_link_libraries(Fierro ${LINKING_LIBRARIES}) diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h b/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h index 2226050fd..69613fb8e 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h @@ -253,7 +253,7 @@ class SGTM3D : public Solver void boundary_heat_flux( const Mesh_t& mesh, const BoundaryCondition_t& Boundary, - DCArrayKokkos& node_temp, + DistributedDCArray& node_temp, const double time_value) const; // **** Functions defined in energy_sgtm.cpp **** // @@ -276,10 +276,10 @@ class SGTM3D : public Solver void update_temperature( const Mesh_t& mesh, const DCArrayKokkos& corner_q_transfer, - const DCArrayKokkos& node_temp, - const DCArrayKokkos& node_temp_n0, - const DCArrayKokkos& node_mass, - const DCArrayKokkos& node_q_transfer, + const DistributedDCArray& node_temp, + const DistributedDCArray& node_temp_n0, + const DistributedDCArray& node_mass, + const DistributedDCArray& node_q_transfer, const DCArrayKokkos& mat_pt_sepcific_heat, const double rk_alpha, const double dt) const; @@ -338,8 +338,8 @@ class SGTM3D : public Solver double rk_alpha, double dt, const Mesh_t& mesh, - DCArrayKokkos& node_vel, - const DCArrayKokkos& node_mass, + DistributedDCArray& node_vel, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_force) const; // **** Functions defined in properties.cpp **** // @@ -372,7 +372,7 @@ class SGTM3D : public Solver DistributedDCArray& node_vel_n0, DistributedDCArray& node_temp, DistributedDCArray& node_temp_n0, - DCArrayKokkos& MaterialPoints_q_flux, + DistributedDCArray& MaterialPoints_q_flux, DCArrayKokkos& MaterialPoints_stress, const size_t num_dims, const size_t num_elems, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp index 02aa7e60c..c742191c6 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/boundary.cpp @@ -50,7 +50,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// void SGTM3D::boundary_temperature(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_temp, + DistributedDCArray& node_temp, const double time_value) const { // ---- Loop over boundary sets ---- // diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp index 4386350c0..bbbe715f1 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/position.cpp @@ -53,9 +53,9 @@ void SGTM3D::update_position( const size_t num_dims, const size_t num_nodes, DistributedDCArray& node_coords, - const DCArrayKokkos& node_coords_n0, - const DCArrayKokkos& node_vel, - const DCArrayKokkos& node_vel_n0) const + const DistributedDCArray& node_coords_n0, + const DistributedDCArray& node_vel, + const DistributedDCArray& node_vel_n0) const { // loop over all the nodes in the mesh FOR_ALL(node_gid, 0, num_nodes, { diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp index 5df77eb15..8611493d2 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/time_integration.cpp @@ -57,7 +57,7 @@ void SGTM3D::rk_init( DistributedDCArray& node_vel_n0, DistributedDCArray& node_temp, DistributedDCArray& node_temp_n0, - DCArrayKokkos& MaterialPoints_q_flux, + DistributedDCArray& MaterialPoints_q_flux, DCArrayKokkos& MaterialPoints_stress, const size_t num_dims, const size_t num_elems, diff --git a/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h b/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h index ef9e63ef2..269a52a31 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h +++ b/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h @@ -177,8 +177,8 @@ class LevelSet : public Solver void nodal_gradient( const Mesh_t mesh, const DistributedDCArray& Node_coords, - const DCArrayKokkos& node_level_set_vel, - const DCArrayKokkos& Node_grad_level_set, + const DistributedDCArray& node_level_set_vel, + const DistributedDCArray& Node_grad_level_set, const DCArrayKokkos& Corner_normal, const DCArrayKokkos& Corner_volume, const DCArrayKokkos& GaussPoints_level_set, @@ -189,8 +189,8 @@ class LevelSet : public Solver void update_level_set( const Mesh_t& mesh, const Material_t& Materials, - const DCArrayKokkos& node_level_set_vel, - const DCArrayKokkos& Node_grad_level_set, + const DistributedDCArray& node_level_set_vel, + const DistributedDCArray& Node_grad_level_set, const DCArrayKokkos& GaussPoints_level_set, const DCArrayKokkos& GaussPoints_level_set_n, const DCArrayKokkos& GaussPoints_vol, @@ -257,7 +257,7 @@ class LevelSet : public Solver void boundary_velocity( const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_level_set_vel, + DistributedDCArray& node_level_set_vel, const double time_value, const double small) const; diff --git a/single-node-refactor/src/Solvers/level_set_solver/src/level_set_boundary.cpp b/single-node-refactor/src/Solvers/level_set_solver/src/level_set_boundary.cpp index 9435bf1b0..36f893d01 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/src/level_set_boundary.cpp +++ b/single-node-refactor/src/Solvers/level_set_solver/src/level_set_boundary.cpp @@ -50,7 +50,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ///////////////////////////////////////////////////////////////////////////// void LevelSet::boundary_velocity(const Mesh_t& mesh, const BoundaryCondition_t& BoundaryConditions, - DCArrayKokkos& node_level_set_vel, + DistributedDCArray& node_level_set_vel, const double time_value, const double small) const { diff --git a/single-node-refactor/src/Solvers/level_set_solver/src/level_set_execute.cpp b/single-node-refactor/src/Solvers/level_set_solver/src/level_set_execute.cpp index 6d05219b6..c1cbef2fc 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/src/level_set_execute.cpp +++ b/single-node-refactor/src/Solvers/level_set_solver/src/level_set_execute.cpp @@ -57,7 +57,7 @@ void LevelSet::execute(SimulationParameters_t& SimulationParamaters, { // arrays local to this solver - DCArrayKokkos node_level_set_vel(mesh.num_nodes, mesh.num_dims); + DistributedDCArray node_level_set_vel(mesh.global_num_nodes, mesh.num_dims); double fuzz = SimulationParamaters.dynamic_options.fuzz; double tiny = SimulationParamaters.dynamic_options.tiny; diff --git a/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp b/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp index f14e0432e..a15dc9d43 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp +++ b/single-node-refactor/src/Solvers/level_set_solver/src/solver_functions.cpp @@ -54,8 +54,8 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. void LevelSet::nodal_gradient( const Mesh_t mesh, const DistributedDCArray& node_coords, - const DCArrayKokkos& node_level_set_vel, - const DCArrayKokkos& node_grad_level_set, + const DistributedDCArray& node_level_set_vel, + const DistributedDCArray& node_grad_level_set, const DCArrayKokkos& corner_normal, const DCArrayKokkos& corner_volume, const DCArrayKokkos& GaussPoints_level_set, @@ -201,8 +201,8 @@ void LevelSet::nodal_gradient( void LevelSet::update_level_set( const Mesh_t& mesh, const Material_t& Materials, - const DCArrayKokkos& node_level_set_vel, - const DCArrayKokkos& node_grad_level_set, + const DistributedDCArray& node_level_set_vel, + const DistributedDCArray& node_grad_level_set, const DCArrayKokkos& GaussPoints_level_set, const DCArrayKokkos& GaussPoints_level_set_n0, const DCArrayKokkos& GaussPoints_vol, From 5f6269f3c1e6ec750b988352ae579242accde4f4 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz Date: Thu, 29 May 2025 00:12:15 -0600 Subject: [PATCH 15/66] WIP: MPI compile work --- single-node-refactor/CMakeLists.txt | 5 +++-- single-node-refactor/scripts/build-fierro.sh | 1 - single-node-refactor/scripts/cmake_build.sh | 3 +-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/single-node-refactor/CMakeLists.txt b/single-node-refactor/CMakeLists.txt index c86ed4796..90e0a57b0 100755 --- a/single-node-refactor/CMakeLists.txt +++ b/single-node-refactor/CMakeLists.txt @@ -71,13 +71,14 @@ if (FIERRO_ENABLE_TRILINOS) include_directories(${Trilinos_INCLUDE_DIRS} ${Trilinos_TPL_INCLUDE_DIRS}) list(APPEND LINKING_LIBRARIES Trilinos::all_selected_libs) - add_definitions(-DTRILINOS_INTERFACE=1 -DHAVE_MPI=1) + add_definitions(-DTRILINOS_INTERFACE=1 -DHAVE_MPI=1 -DHAVE_KOKKOS=1) else() find_package(Kokkos REQUIRED) list(APPEND LINKING_LIBRARIES Kokkos::kokkos) add_definitions(-DHAVE_KOKKOS=1) endif() -find_package(Matar REQUIRED) +#find_package(Matar REQUIRED) +include_directories(${MATAR_SOURCE_DIR}/src/include) include_directories(src/material_models/artificial_viscosity) include_directories(src/material_models/eos) diff --git a/single-node-refactor/scripts/build-fierro.sh b/single-node-refactor/scripts/build-fierro.sh index 9dcaa18b7..ce4de3d53 100755 --- a/single-node-refactor/scripts/build-fierro.sh +++ b/single-node-refactor/scripts/build-fierro.sh @@ -207,7 +207,6 @@ if [ "$build_action" = "full-app" ]; then elif [ "$trilinos" = "enabled" ]; then source trilinos-install.sh ${kokkos_build_type} ${intel_mkl} ${debug} fi - source matar-install.sh ${kokkos_build_type} ${debug} ${trilinos} source cmake_build.sh ${solver} ${debug} ${trilinos} elif [ "$build_action" = "install-kokkos" ]; then source kokkos-install.sh ${kokkos_build_type} diff --git a/single-node-refactor/scripts/cmake_build.sh b/single-node-refactor/scripts/cmake_build.sh index 96061c8bb..1a3f3045a 100755 --- a/single-node-refactor/scripts/cmake_build.sh +++ b/single-node-refactor/scripts/cmake_build.sh @@ -16,7 +16,6 @@ if [ "$trilinos" = "enabled" ]; then Trilinos_DIR=${TRILINOS_INSTALL_DIR}/lib/cmake/Trilinos fi cmake_options+=( - -D CMAKE_PREFIX_PATH="${MATAR_INSTALL_DIR}" -D Trilinos_DIR="$Trilinos_DIR" -D FIERRO_ENABLE_TRILINOS=ON ) @@ -46,7 +45,7 @@ fi echo "CMake Options: ${cmake_options[@]}" # Configure SGH -cmake "${cmake_options[@]}" -B "${SGH_BUILD_DIR}" -S "${SGH_BASE_DIR}" +cmake "${cmake_options[@]}" -DCMAKE_VERBOSE_MAKEFILE=ON -B "${SGH_BUILD_DIR}" -S "${SGH_BASE_DIR}" # Build SGH make -C "${SGH_BUILD_DIR}" -j${SGH_BUILD_CORES} From 7cfe2374eb2ceff4df6bb39f09d95970ac5a02b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 29 May 2025 13:39:28 -0600 Subject: [PATCH 16/66] WIP: MPI work --- single-node-refactor/scripts/trilinos-install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/single-node-refactor/scripts/trilinos-install.sh b/single-node-refactor/scripts/trilinos-install.sh index 2d5508ad0..1589d0f56 100644 --- a/single-node-refactor/scripts/trilinos-install.sh +++ b/single-node-refactor/scripts/trilinos-install.sh @@ -117,6 +117,7 @@ ${ADDITIONS[@]} -D Trilinos_ENABLE_Ifpack2=OFF -D Trilinos_ENABLE_Zoltan2=ON -D Trilinos_ENABLE_Anasazi=OFF +-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION=OFF -D MueLu_ENABLE_TESTS=OFF -D Trilinos_ENABLE_ALL_PACKAGES=OFF -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES=OFF From 1bae191e5ff08371b015b0c5a0a33d933d8f501b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 30 May 2025 09:20:13 -0600 Subject: [PATCH 17/66] WIP: MPI work (it compiles!) --- single-node-refactor/CMakeLists.txt | 24 ++++++++++--------- single-node-refactor/scripts/cmake_build.sh | 2 +- .../SGH_solver_rz/src/sgh_execute_rz.cpp | 2 +- .../src/common/src/region_fill.cpp | 14 +++++------ 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/single-node-refactor/CMakeLists.txt b/single-node-refactor/CMakeLists.txt index 90e0a57b0..6c2e41896 100755 --- a/single-node-refactor/CMakeLists.txt +++ b/single-node-refactor/CMakeLists.txt @@ -29,8 +29,8 @@ endif() if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE RelWithDebInfo) - # set(CMAKE_BUILD_TYPE Release) + # set(CMAKE_BUILD_TYPE RelWithDebInfo) + set(CMAKE_BUILD_TYPE Release) endif(NOT CMAKE_BUILD_TYPE) include_directories(../lib/Elements/matar) @@ -40,20 +40,22 @@ add_subdirectory(../lib/Elements/matar cbin) # include_directories(Mesh-Builder) # add_subdirectory(Mesh-Builder) +find_package(MPI REQUIRED) +add_definitions(-DHAVE_MPI=1) +set(LINKING_LIBRARIES MPI::MPI_CXX) +if (GPU_AWARE_MPI) + add_definitions(-DHAVE_GPU_AWARE_MPI) +# elseif (GPU_SHARED_MEM) +# add_definitions(-DHAVE_GPU_SHARED_MEM) +endif() if (FIERRO_ENABLE_TRILINOS) + find_package(Trilinos REQUIRED) #new # Assume if the CXX compiler exists, the rest do too. - if (EXISTS ${Trilinos_CXX_COMPILER}) set(CMAKE_CXX_COMPILER ${Trilinos_CXX_COMPILER}) set(CMAKE_C_COMPILER ${Trilinos_C_COMPILER}) set(CMAKE_Fortran_COMPILER ${Trilinos_Fortran_COMPILER}) - endif() - if(NOT DISTRIBUTION) - # Make sure to use same compilers and flags as Trilinos - set(CMAKE_CXX_FLAGS "${Trilinos_CXX_COMPILER_FLAGS} ${CMAKE_CXX_FLAGS}") - set(CMAKE_C_FLAGS "${Trilinos_C_COMPILER_FLAGS} ${CMAKE_C_FLAGS}") - set(CMAKE_Fortran_FLAGS "${Trilinos_Fortran_COMPILER_FLAGS} ${CMAKE_Fortran_FLAGS}") - endif() + message("\nFound Trilinos! Here are the details: ") message(" Trilinos_DIR = ${Trilinos_DIR}") @@ -71,7 +73,7 @@ if (FIERRO_ENABLE_TRILINOS) include_directories(${Trilinos_INCLUDE_DIRS} ${Trilinos_TPL_INCLUDE_DIRS}) list(APPEND LINKING_LIBRARIES Trilinos::all_selected_libs) - add_definitions(-DTRILINOS_INTERFACE=1 -DHAVE_MPI=1 -DHAVE_KOKKOS=1) + add_definitions(-DTRILINOS_INTERFACE=1 -DHAVE_KOKKOS=1) else() find_package(Kokkos REQUIRED) list(APPEND LINKING_LIBRARIES Kokkos::kokkos) diff --git a/single-node-refactor/scripts/cmake_build.sh b/single-node-refactor/scripts/cmake_build.sh index 1a3f3045a..c85b6e2e8 100755 --- a/single-node-refactor/scripts/cmake_build.sh +++ b/single-node-refactor/scripts/cmake_build.sh @@ -45,7 +45,7 @@ fi echo "CMake Options: ${cmake_options[@]}" # Configure SGH -cmake "${cmake_options[@]}" -DCMAKE_VERBOSE_MAKEFILE=ON -B "${SGH_BUILD_DIR}" -S "${SGH_BASE_DIR}" +cmake "${cmake_options[@]}" -B "${SGH_BUILD_DIR}" -S "${SGH_BASE_DIR}" # Build SGH make -C "${SGH_BUILD_DIR}" -j${SGH_BUILD_CORES} diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp index 2460bf17e..0575e90f0 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp @@ -551,7 +551,7 @@ double sum_domain_internal_energy_rz(const DCArrayKokkos& MaterialPoints } // end function double sum_domain_kinetic_energy_rz(const Mesh_t& mesh, - const DCArrayKokkos& node_vel, + const DistributedDCArray& node_vel, const CArrayKokkos& node_extensive_mass) { // extensive KE diff --git a/single-node-refactor/src/common/src/region_fill.cpp b/single-node-refactor/src/common/src/region_fill.cpp index f2433bd87..73bcf2f17 100644 --- a/single-node-refactor/src/common/src/region_fill.cpp +++ b/single-node-refactor/src/common/src/region_fill.cpp @@ -1683,7 +1683,7 @@ void paint_multi_scalar(const DCArrayKokkos& field_scalar, /// ///////////////////////////////////////////////////////////////////////////// KOKKOS_FUNCTION -void paint_scalar(const DCArrayKokkos& field_scalar, +void paint_scalar(const DistributedDCArray& field_scalar, const ViewCArrayKokkos mesh_coords, const double scalar, const double slope, @@ -1813,7 +1813,7 @@ void paint_scalar(const DCArrayKokkos& field_scalar, /// ///////////////////////////////////////////////////////////////////////////// KOKKOS_FUNCTION -void paint_vector(const DCArrayKokkos& vector_field, +void paint_vector(const DistributedDCArray& vector_field, const ViewCArrayKokkos & mesh_coords, const double u, const double v, @@ -2240,8 +2240,8 @@ void init_press_sspd_stress(const Material_t& Materials, ///////////////////////////////////////////////////////////////////////////// void calc_corner_mass(const Material_t& Materials, const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass, const DCArrayKokkos& MaterialPoints_mass, const DCArrayKokkos& MaterialToMeshMaps_elem, @@ -2285,8 +2285,8 @@ void calc_corner_mass(const Material_t& Materials, /// ///////////////////////////////////////////////////////////////////////////// void calc_node_mass(const Mesh_t& mesh, - const DCArrayKokkos& node_coords, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_coords, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass) { @@ -2315,7 +2315,7 @@ void calc_node_mass(const Mesh_t& mesh, /// ///////////////////////////////////////////////////////////////////////////// void init_corner_node_masses_zero(const Mesh_t& mesh, - const DCArrayKokkos& node_mass, + const DistributedDCArray& node_mass, const DCArrayKokkos& corner_mass) { // calculate the nodal mass From 043f5a9551419fba653e75a1349892452487c292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 13 Jun 2025 21:34:03 -0600 Subject: [PATCH 18/66] WIP: MPI in refactor --- .../src/common/include/mesh.h | 369 ++++++++++++++++++ .../src/common/include/mesh_io.h | 4 +- .../src/common/include/state.h | 3 +- single-node-refactor/src/driver.cpp | 3 + 4 files changed, 376 insertions(+), 3 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 8486438b8..a26684937 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -383,6 +383,375 @@ struct Mesh_t return; }; // end method + /* ---------------------------------------------------------------------- + Initialize Ghost and Non-Overlapping Element Maps + ------------------------------------------------------------------------- */ + void init_maps() + { + int num_dim = simparam.num_dims; + int local_node_index, current_column_index; + int num_nodes_in_elem; + long long int node_gid; + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + + num_ghost_nodes=0; + DCArrayKokkos ghost_nodes; + if (num_elems >= 1) + { + // Construct set of ghost nodes; start with a buffer with upper limit + std::set ghost_node_set; + + for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) + { + // set nodes per element + num_nodes_in_elem = elem->num_nodes(); + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) + { + node_gid = nodes_in_elem(cell_rid, node_lid); //nodes in elem still stores global indices + if (!node_map.isProcessGlobalIndex(node_gid)) + { + ghost_node_set.insert(node_gid); + } + } + } + + // by now the set contains, with no repeats, all the global node indices that are ghosts for this rank + // now pass the contents of the set over to a CArrayKokkos, then create a map to find local ghost indices from global ghost indices + + num_ghost_nodes = ghost_node_set.size(); + + // create a Map for ghost node indices + ghost_nodes = DCArrayKokkos(num_ghost_nodes, "ghost_nodes"); //pass this into map object + while (it != ghost_node_set.end()) { + ghost_nodes.host(ighost++) = *it; + it++; + } + ghost_nodes.update_device(); + + //Use the ranks to break ties in shared element assignment for a unique element map used in elem set reductions later + //this wont be that great at load balancing element counts but its simple and works for now + ghost_node_ranks = DCArrayKokkos(num_ghost_nodes, "ghost_nodes_ranks"); + int ighost = 0; + auto it = ghost_node_set.begin(); + + // debug print of ghost nodes + // std::cout << " GHOST NODE SET ON TASK " << myrank << std::endl; + // for(int i = 0; i < num_ghost_nodes; i++) + // std::cout << "{" << i + 1 << "," << ghost_nodes(i) + 1 << "}" << std::endl; + + // find which mpi rank each ghost node belongs to and store the information in a CArrayKokkos + // // allocate Teuchos Views since they are the only input available at the moment in the Tpetra map definitions + // Teuchos::ArrayView ghost_nodes_pass(ghost_nodes.h_view.data(), num_ghost_nodes); + + // Teuchos::ArrayView ghost_node_ranks_pass(ghost_node_ranks.h_view.data(), num_ghost_nodes); + + node_map.getRemoteIndexList(ghost_nodes, ghost_node_ranks); + ghost_node_ranks.update_device(); + } + + ghost_node_map = DistributedMap(ghost_nodes); + + // construct array for all indices (ghost + local) + num_nodes = num_local_nodes + num_ghost_nodes; + DCArrayKokkos all_nodes; + if(num_nodes){ + // CArrayKokkos all_node_indices(num_nodes, "all_node_indices"); + all_nodes = DCArrayKokkosDCArrayKokkos(num_nodes, "num_nodes"); + for (int i = 0; i < num_nodes; i++) + { + if (i < num_local_nodes) + { + all_nodes.host(i) = node_map.getGlobalIndex(i); + } + else + { + all_nodes.host(i) = ghost_nodes.host(i - num_local_nodes); + } + } + all_nodes.update_device(); + // debug print of node indices + // for(int inode=0; inode < index_counter; inode++) + // std::cout << " my_reduced_global_indices " << my_reduced_global_indices(inode) < Initial_Element_Global_Indices("Initial_Element_Global_Indices", num_elems); + + size_t nonoverlapping_count = 0; + int my_element_flag; + + // loop through local element set + if (num_dim == 2) + { + for (int ielem = 0; ielem < num_elems; ielem++) + { + element_select->choose_2Delem_type(Element_Types(ielem), elem2D); + num_nodes_in_elem = elem2D->num_nodes(); + + my_element_flag = 1; + for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) + { + node_gid = nodes_in_elem(ielem, lnode); + if (ghost_node_map->isNodeGlobalElement(node_gid)) + { + local_node_index = ghost_node_map->getLocalElement(node_gid); + if (ghost_node_ranks.h_view(local_node_index) < myrank) + { + my_element_flag = 0; + } + } + } + if (my_element_flag) + { + Initial_Element_Global_Indices.h_view(nonoverlapping_count++) = all_element_map->getGlobalElement(ielem); + } + } + } + + if (num_dim == 3) + { + for (int ielem = 0; ielem < num_elems; ielem++) + { + element_select->choose_3Delem_type(Element_Types(ielem), elem); + num_nodes_in_elem = elem->num_nodes(); + + my_element_flag = 1; + + for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) + { + node_gid = nodes_in_elem(ielem, lnode); + if (ghost_node_map->isNodeGlobalElement(node_gid)) + { + local_node_index = ghost_node_map->getLocalElement(node_gid); + if (ghost_node_ranks.h_view(local_node_index) < myrank) + { + my_element_flag = 0; + } + } + } + if (my_element_flag) + { + Initial_Element_Global_Indices.h_view(nonoverlapping_count++) = all_element_map->getGlobalElement(ielem); + } + } + } + + // copy over from buffer to compressed storage + Kokkos::DualView Element_Global_Indices("Element_Global_Indices", nonoverlapping_count); + for (int ibuffer = 0; ibuffer < nonoverlapping_count; ibuffer++) + { + Element_Global_Indices.h_view(ibuffer) = Initial_Element_Global_Indices.h_view(ibuffer); + } + nlocal_elem_non_overlapping = nonoverlapping_count; + Element_Global_Indices.modify_host(); + Element_Global_Indices.sync_device(); + // create nonoverlapping element map + element_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), Element_Global_Indices.d_view, 0, comm)); + + // sort element connectivity so nonoverlaps are sequentially found first + // define initial sorting of global indices + + // element_map->describe(*fos,Teuchos::VERB_EXTREME); + + for (int ielem = 0; ielem < num_elems; ielem++) + { + Initial_Element_Global_Indices.h_view(ielem) = all_element_map->getGlobalElement(ielem); + } + + // re-sort so local elements in the nonoverlapping map are first in storage + CArrayKokkos Temp_Nodes(max_nodes_per_element); + + GO temp_element_gid, current_element_gid; + int last_storage_index = num_elems - 1; + + for (int ielem = 0; ielem < nlocal_elem_non_overlapping; ielem++) + { + current_element_gid = Initial_Element_Global_Indices.h_view(ielem); + // if this element is not part of the non overlap list then send it to the end of the storage and swap the element at the end + if (!element_map->isNodeGlobalElement(current_element_gid)) + { + temp_element_gid = current_element_gid; + for (int lnode = 0; lnode < max_nodes_per_element; lnode++) + { + Temp_Nodes(lnode) = nodes_in_elem(ielem, lnode); + } + Initial_Element_Global_Indices.h_view(ielem) = Initial_Element_Global_Indices.h_view(last_storage_index); + Initial_Element_Global_Indices.h_view(last_storage_index) = temp_element_gid; + for (int lnode = 0; lnode < max_nodes_per_element; lnode++) + { + nodes_in_elem(ielem, lnode) = nodes_in_elem(last_storage_index, lnode); + nodes_in_elem(last_storage_index, lnode) = Temp_Nodes(lnode); + } + last_storage_index--; + + // test if swapped element is also not part of the non overlap map; if so lower loop counter to repeat the above + temp_element_gid = Initial_Element_Global_Indices.h_view(ielem); + if (!element_map->isNodeGlobalElement(temp_element_gid)) + { + ielem--; + } + } + } + // reset all element map to its re-sorted version + Initial_Element_Global_Indices.modify_host(); + Initial_Element_Global_Indices.sync_device(); + + all_element_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), Initial_Element_Global_Indices.d_view, 0, comm)); + // element_map->describe(*fos,Teuchos::VERB_EXTREME); + // all_element_map->describe(*fos,Teuchos::VERB_EXTREME); + + // all_element_map->describe(*fos,Teuchos::VERB_EXTREME); + // construct dof map that follows from the node map (used for distributed matrix and vector objects later) + Kokkos::DualView local_dof_indices("local_dof_indices", num_local_nodes * num_dim); + for (int i = 0; i < num_local_nodes; i++) + { + for (int j = 0; j < num_dim; j++) + { + local_dof_indices.h_view(i * num_dim + j) = map->getGlobalElement(i) * num_dim + j; + } + } + + local_dof_indices.modify_host(); + local_dof_indices.sync_device(); + local_dof_map = Teuchos::rcp(new Tpetra::Map(num_nodes * num_dim, local_dof_indices.d_view, 0, comm) ); + + // construct dof map that follows from the all_node map (used for distributed matrix and vector objects later) + Kokkos::DualView all_dof_indices("all_dof_indices", num_nodes * num_dim); + for (int i = 0; i < num_nodes; i++) + { + for (int j = 0; j < num_dim; j++) + { + all_dof_indices.h_view(i * num_dim + j) = all_node_map->getGlobalElement(i) * num_dim + j; + } + } + + all_dof_indices.modify_host(); + all_dof_indices.sync_device(); + // pass invalid global count so the map reduces the global count automatically + all_dof_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), all_dof_indices.d_view, 0, comm) ); + + // debug print of map + // debug print + + std::ostream& out = std::cout; + + Teuchos::RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); + // if(myrank==0) + // *fos << "Ghost Node Map :" << std::endl; + // all_node_map->describe(*fos,Teuchos::VERB_EXTREME); + // *fos << std::endl; + // std::fflush(stdout); + + // Count how many elements connect to each local node + node_nconn_distributed = Teuchos::rcp(new MCONN(map, 1)); + // active view scope + { + host_elem_conn_array node_nconn = node_nconn_distributed->getLocalView(Tpetra::Access::ReadWrite); + for (int inode = 0; inode < num_local_nodes; inode++) + { + node_nconn(inode, 0) = 0; + } + + for (int ielem = 0; ielem < num_elems; ielem++) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { + node_gid = nodes_in_elem(ielem, inode); + if (map->isNodeGlobalElement(node_gid)) + { + node_nconn(map->getLocalElement(node_gid), 0)++; + } + } + } + } + + // create distributed multivector of the local node data and all (local + ghost) node storage + + all_node_coords_distributed = Teuchos::rcp(new MV(all_node_map, num_dim)); + ghost_node_coords_distributed = Teuchos::rcp(new MV(ghost_node_map, num_dim)); + + // create import object using local node indices map and all indices map + comm_importer_setup(); + + // create export objects for reverse comms + comm_exporter_setup(); + + // comms to get ghosts + all_node_coords_distributed->doImport(*node_coords_distributed, *importer, Tpetra::INSERT); + // all_node_nconn_distributed->doImport(*node_nconn_distributed, importer, Tpetra::INSERT); + + dual_nodes_in_elem.sync_device(); + dual_nodes_in_elem.modify_device(); + // construct distributed element connectivity multivector + global_nodes_in_elem_distributed = Teuchos::rcp(new MCONN(all_element_map, dual_nodes_in_elem)); + + // construct map of nodes that belong to the non-overlapping element set (contained by ghost + local node set but not all of them) + std::set nonoverlap_elem_node_set; + if (nlocal_elem_non_overlapping) + { + // search through local elements for global node indices not owned by this MPI rank + if (num_dim == 2) + { + for (int cell_rid = 0; cell_rid < nlocal_elem_non_overlapping; cell_rid++) + { + // set nodes per element + element_select->choose_2Delem_type(Element_Types(cell_rid), elem2D); + num_nodes_in_elem = elem2D->num_nodes(); + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) + { + node_gid = nodes_in_elem(cell_rid, node_lid); + nonoverlap_elem_node_set.insert(node_gid); + } + } + } + + if (num_dim == 3) + { + for (int cell_rid = 0; cell_rid < nlocal_elem_non_overlapping; cell_rid++) + { + // set nodes per element + element_select->choose_3Delem_type(Element_Types(cell_rid), elem); + num_nodes_in_elem = elem->num_nodes(); + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) + { + node_gid = nodes_in_elem(cell_rid, node_lid); + nonoverlap_elem_node_set.insert(node_gid); + } + } + } + } + + // by now the set contains, with no repeats, all the global node indices belonging to the non overlapping element list on this MPI rank + // now pass the contents of the set over to a CArrayKokkos, then create a map to find local ghost indices from global ghost indices + nnonoverlap_elem_nodes = nonoverlap_elem_node_set.size(); + nonoverlap_elem_nodes = Kokkos::DualView("nonoverlap_elem_nodes", nnonoverlap_elem_nodes); + if(nnonoverlap_elem_nodes){ + int inonoverlap_elem_node = 0; + auto it = nonoverlap_elem_node_set.begin(); + while (it != nonoverlap_elem_node_set.end()) { + nonoverlap_elem_nodes.h_view(inonoverlap_elem_node++) = *it; + it++; + } + nonoverlap_elem_nodes.modify_host(); + nonoverlap_elem_nodes.sync_device(); + } + + // create a Map for node indices belonging to the non-overlapping set of elements + nonoverlap_element_node_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), nonoverlap_elem_nodes.d_view, 0, comm)); + + // std::cout << "number of patches = " << mesh->num_patches() << std::endl; + if (myrank == 0) + { + std::cout << "End of map setup " << std::endl; + } + } + // build the corner mesh connectivity arrays void build_corner_connectivity() { diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 138b81afd..4114c235e 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1140,7 +1140,7 @@ class MeshReader //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope std::vector required_node_state = { node_state::coords }; map = node_coords_distributed.pmap; - node.post_repartition_initialize(map, num_dims, required_node_state); + node.initialize(map, num_dims, required_node_state); } //initialize some mesh data @@ -1398,7 +1398,7 @@ class MeshReader for (int ielem = 0; ielem < num_elems; ielem++) { for (int inode = 0; inode < elem_words_per_line; inode++) - { + { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) nodes_in_elem.host(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; } } diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 73ac93df6..a492644fc 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -322,7 +322,8 @@ struct node_t } }; // end method - void post_repartition_initialize(DistributedMap partitioned_map, size_t num_dims, std::vector node_states) + //initialize overload with a partitioned map + void initialize(DistributedMap partitioned_map, size_t num_dims, std::vector node_states) { for (auto field : node_states){ switch(field){ diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index da853af48..56455d153 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -106,6 +106,9 @@ void Driver::initialize() exit(0); } + //build relevant partition maps for ghost nodes, elements, etc. + mesh.init_maps(); + // Build boundary conditions const int num_bcs = BoundaryConditions.num_bcs; From 7b37dae27169e00f615d9dfdd70baa2e540b7908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 15 Jun 2025 22:34:42 -0600 Subject: [PATCH 19/66] WIP: init partition maps and comm plans --- .../src/common/include/mesh.h | 216 ++++-------------- 1 file changed, 50 insertions(+), 166 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index a26684937..1db497376 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -231,7 +231,7 @@ struct Mesh_t // ---- Element Data Definitions ---- // size_t global_num_elems; ///< Global number of elements in the mesh size_t num_elems; ///< number of local+shared elements on this process (forces usually employ this) - size_t nlocal_elems; ///< number of local elements on this process (output and reductions for energy usually employ this) + size_t num_local_elems; ///< number of local elements on this process (output and reductions for energy usually employ this) size_t num_nodes_in_elem; ///< Number of nodes in an element size_t num_patches_in_elem; ///< Number of patches in an element size_t num_surfs_in_elem; ///< Number of surfaces in an element @@ -481,78 +481,43 @@ struct Mesh_t // remove elements from the local set so that each rank has a unique set of global ids // local elements belonging to the non-overlapping element distribution to each rank with buffer - Kokkos::DualView Initial_Element_Global_Indices("Initial_Element_Global_Indices", num_elems); + DCArrayKokkos Initial_Element_Global_Indices(num_elems, "Initial_Element_Global_Indices"); size_t nonoverlapping_count = 0; int my_element_flag; - - // loop through local element set - if (num_dim == 2) + for (int ielem = 0; ielem < num_elems; ielem++) { - for (int ielem = 0; ielem < num_elems; ielem++) - { - element_select->choose_2Delem_type(Element_Types(ielem), elem2D); - num_nodes_in_elem = elem2D->num_nodes(); + my_element_flag = 1; - my_element_flag = 1; - for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) + for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) + { + node_gid = nodes_in_elem(ielem, lnode); + if (ghost_node_map.isProcessGlobalIndex(node_gid)) { - node_gid = nodes_in_elem(ielem, lnode); - if (ghost_node_map->isNodeGlobalElement(node_gid)) + local_node_index = ghost_node_map.getLocalIndex(node_gid); + if (ghost_node_ranks.host(local_node_index) < myrank) { - local_node_index = ghost_node_map->getLocalElement(node_gid); - if (ghost_node_ranks.h_view(local_node_index) < myrank) - { - my_element_flag = 0; - } + my_element_flag = 0; } } - if (my_element_flag) - { - Initial_Element_Global_Indices.h_view(nonoverlapping_count++) = all_element_map->getGlobalElement(ielem); - } } - } - - if (num_dim == 3) - { - for (int ielem = 0; ielem < num_elems; ielem++) + if (my_element_flag) { - element_select->choose_3Delem_type(Element_Types(ielem), elem); - num_nodes_in_elem = elem->num_nodes(); - - my_element_flag = 1; - - for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) - { - node_gid = nodes_in_elem(ielem, lnode); - if (ghost_node_map->isNodeGlobalElement(node_gid)) - { - local_node_index = ghost_node_map->getLocalElement(node_gid); - if (ghost_node_ranks.h_view(local_node_index) < myrank) - { - my_element_flag = 0; - } - } - } - if (my_element_flag) - { - Initial_Element_Global_Indices.h_view(nonoverlapping_count++) = all_element_map->getGlobalElement(ielem); - } + Initial_Element_Global_Indices.host(nonoverlapping_count++) = all_element_map.getGlobalIndex(ielem); } } // copy over from buffer to compressed storage - Kokkos::DualView Element_Global_Indices("Element_Global_Indices", nonoverlapping_count); + DCArrayKokkos Element_Global_Indices(nonoverlapping_count, "Element_Global_Indices"); for (int ibuffer = 0; ibuffer < nonoverlapping_count; ibuffer++) { - Element_Global_Indices.h_view(ibuffer) = Initial_Element_Global_Indices.h_view(ibuffer); + Element_Global_Indices.host(ibuffer) = Initial_Element_Global_Indices.host(ibuffer); } - nlocal_elem_non_overlapping = nonoverlapping_count; - Element_Global_Indices.modify_host(); - Element_Global_Indices.sync_device(); + num_local_elems = nonoverlapping_count; + Element_Global_Indices.update_device(); + // create nonoverlapping element map - element_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), Element_Global_Indices.d_view, 0, comm)); + element_map = DistributedMap(Element_Global_Indices); // sort element connectivity so nonoverlaps are sequentially found first // define initial sorting of global indices @@ -561,116 +526,55 @@ struct Mesh_t for (int ielem = 0; ielem < num_elems; ielem++) { - Initial_Element_Global_Indices.h_view(ielem) = all_element_map->getGlobalElement(ielem); + Initial_Element_Global_Indices.host(ielem) = all_element_map.getGlobalIndex(ielem); } // re-sort so local elements in the nonoverlapping map are first in storage - CArrayKokkos Temp_Nodes(max_nodes_per_element); + CArrayKokkos Temp_Nodes(num_nodes_in_elem); - GO temp_element_gid, current_element_gid; + long long int temp_element_gid, current_element_gid; int last_storage_index = num_elems - 1; - for (int ielem = 0; ielem < nlocal_elem_non_overlapping; ielem++) + for (int ielem = 0; ielem < num_local_elems; ielem++) { - current_element_gid = Initial_Element_Global_Indices.h_view(ielem); + current_element_gid = Initial_Element_Global_Indices.host(ielem); // if this element is not part of the non overlap list then send it to the end of the storage and swap the element at the end - if (!element_map->isNodeGlobalElement(current_element_gid)) + if (!element_map.isProcessGlobalIndex(current_element_gid)) { temp_element_gid = current_element_gid; - for (int lnode = 0; lnode < max_nodes_per_element; lnode++) + for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) { - Temp_Nodes(lnode) = nodes_in_elem(ielem, lnode); + Temp_Nodes(lnode) = nodes_in_elem.host(ielem, lnode); } - Initial_Element_Global_Indices.h_view(ielem) = Initial_Element_Global_Indices.h_view(last_storage_index); - Initial_Element_Global_Indices.h_view(last_storage_index) = temp_element_gid; - for (int lnode = 0; lnode < max_nodes_per_element; lnode++) + Initial_Element_Global_Indices.host(ielem) = Initial_Element_Global_Indices.host(last_storage_index); + Initial_Element_Global_Indices.host(last_storage_index) = temp_element_gid; + for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) { - nodes_in_elem(ielem, lnode) = nodes_in_elem(last_storage_index, lnode); - nodes_in_elem(last_storage_index, lnode) = Temp_Nodes(lnode); + nodes_in_elem.host(ielem, lnode) = nodes_in_elem.host(last_storage_index, lnode); + nodes_in_elem.host(last_storage_index, lnode) = Temp_Nodes(lnode); } last_storage_index--; // test if swapped element is also not part of the non overlap map; if so lower loop counter to repeat the above - temp_element_gid = Initial_Element_Global_Indices.h_view(ielem); - if (!element_map->isNodeGlobalElement(temp_element_gid)) + temp_element_gid = Initial_Element_Global_Indices.host(ielem); + if (!element_map.isProcessGlobalIndex(temp_element_gid)) { ielem--; } } } // reset all element map to its re-sorted version - Initial_Element_Global_Indices.modify_host(); - Initial_Element_Global_Indices.sync_device(); + Initial_Element_Global_Indices.update_device(); + nodes_in_elem.update_device(); - all_element_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), Initial_Element_Global_Indices.d_view, 0, comm)); - // element_map->describe(*fos,Teuchos::VERB_EXTREME); - // all_element_map->describe(*fos,Teuchos::VERB_EXTREME); + all_element_map = DistributedMap(Initial_Element_Global_Indices); + //redefine nodes_in_elem so partition map of the distributed array is synchronized with permuted dual view contents + DistributedDCArray nodes_in_elem_temp(all_element_map, num_nodes_in_elem); + nodes_in_elem_temp.replace_kokkos_dual_view(nodes_in_elem.get_kokkos_dual_view()); + nodes_in_elem = nodes_in_elem_temp; + // element_map->describe(*fos,Teuchos::VERB_EXTREME); // all_element_map->describe(*fos,Teuchos::VERB_EXTREME); - // construct dof map that follows from the node map (used for distributed matrix and vector objects later) - Kokkos::DualView local_dof_indices("local_dof_indices", num_local_nodes * num_dim); - for (int i = 0; i < num_local_nodes; i++) - { - for (int j = 0; j < num_dim; j++) - { - local_dof_indices.h_view(i * num_dim + j) = map->getGlobalElement(i) * num_dim + j; - } - } - - local_dof_indices.modify_host(); - local_dof_indices.sync_device(); - local_dof_map = Teuchos::rcp(new Tpetra::Map(num_nodes * num_dim, local_dof_indices.d_view, 0, comm) ); - - // construct dof map that follows from the all_node map (used for distributed matrix and vector objects later) - Kokkos::DualView all_dof_indices("all_dof_indices", num_nodes * num_dim); - for (int i = 0; i < num_nodes; i++) - { - for (int j = 0; j < num_dim; j++) - { - all_dof_indices.h_view(i * num_dim + j) = all_node_map->getGlobalElement(i) * num_dim + j; - } - } - - all_dof_indices.modify_host(); - all_dof_indices.sync_device(); - // pass invalid global count so the map reduces the global count automatically - all_dof_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), all_dof_indices.d_view, 0, comm) ); - - // debug print of map - // debug print - - std::ostream& out = std::cout; - - Teuchos::RCP fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(out)); - // if(myrank==0) - // *fos << "Ghost Node Map :" << std::endl; - // all_node_map->describe(*fos,Teuchos::VERB_EXTREME); - // *fos << std::endl; - // std::fflush(stdout); - - // Count how many elements connect to each local node - node_nconn_distributed = Teuchos::rcp(new MCONN(map, 1)); - // active view scope - { - host_elem_conn_array node_nconn = node_nconn_distributed->getLocalView(Tpetra::Access::ReadWrite); - for (int inode = 0; inode < num_local_nodes; inode++) - { - node_nconn(inode, 0) = 0; - } - - for (int ielem = 0; ielem < num_elems; ielem++) - { - for (int inode = 0; inode < num_nodes_in_elem; inode++) - { - node_gid = nodes_in_elem(ielem, inode); - if (map->isNodeGlobalElement(node_gid)) - { - node_nconn(map->getLocalElement(node_gid), 0)++; - } - } - } - } - // create distributed multivector of the local node data and all (local + ghost) node storage all_node_coords_distributed = Teuchos::rcp(new MV(all_node_map, num_dim)); @@ -684,7 +588,6 @@ struct Mesh_t // comms to get ghosts all_node_coords_distributed->doImport(*node_coords_distributed, *importer, Tpetra::INSERT); - // all_node_nconn_distributed->doImport(*node_nconn_distributed, importer, Tpetra::INSERT); dual_nodes_in_elem.sync_device(); dual_nodes_in_elem.modify_device(); @@ -693,36 +596,17 @@ struct Mesh_t // construct map of nodes that belong to the non-overlapping element set (contained by ghost + local node set but not all of them) std::set nonoverlap_elem_node_set; - if (nlocal_elem_non_overlapping) + if (num_local_elems) { - // search through local elements for global node indices not owned by this MPI rank - if (num_dim == 2) - { - for (int cell_rid = 0; cell_rid < nlocal_elem_non_overlapping; cell_rid++) - { - // set nodes per element - element_select->choose_2Delem_type(Element_Types(cell_rid), elem2D); - num_nodes_in_elem = elem2D->num_nodes(); - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) - { - node_gid = nodes_in_elem(cell_rid, node_lid); - nonoverlap_elem_node_set.insert(node_gid); - } - } - } - - if (num_dim == 3) + for (int cell_rid = 0; cell_rid < num_local_elems; cell_rid++) { - for (int cell_rid = 0; cell_rid < nlocal_elem_non_overlapping; cell_rid++) + // set nodes per element + element_select->choose_3Delem_type(Element_Types(cell_rid), elem); + num_nodes_in_elem = elem->num_nodes(); + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - // set nodes per element - element_select->choose_3Delem_type(Element_Types(cell_rid), elem); - num_nodes_in_elem = elem->num_nodes(); - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) - { - node_gid = nodes_in_elem(cell_rid, node_lid); - nonoverlap_elem_node_set.insert(node_gid); - } + node_gid = nodes_in_elem(cell_rid, node_lid); + nonoverlap_elem_node_set.insert(node_gid); } } } From 33b6fa8e36034c23431ca82367891e16a4820257 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 17 Jun 2025 13:39:11 -0600 Subject: [PATCH 20/66] WIP: MPI refactor --- .../src/common/include/mesh.h | 22 +-- .../src/common/include/state.h | 155 +++++++++++++----- 2 files changed, 120 insertions(+), 57 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 1db497376..54524ebaa 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -589,23 +589,16 @@ struct Mesh_t // comms to get ghosts all_node_coords_distributed->doImport(*node_coords_distributed, *importer, Tpetra::INSERT); - dual_nodes_in_elem.sync_device(); - dual_nodes_in_elem.modify_device(); - // construct distributed element connectivity multivector - global_nodes_in_elem_distributed = Teuchos::rcp(new MCONN(all_element_map, dual_nodes_in_elem)); - // construct map of nodes that belong to the non-overlapping element set (contained by ghost + local node set but not all of them) - std::set nonoverlap_elem_node_set; + std::set nonoverlap_elem_node_set; if (num_local_elems) { for (int cell_rid = 0; cell_rid < num_local_elems; cell_rid++) { // set nodes per element - element_select->choose_3Delem_type(Element_Types(cell_rid), elem); - num_nodes_in_elem = elem->num_nodes(); for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - node_gid = nodes_in_elem(cell_rid, node_lid); + node_gid = nodes_in_elem.host(cell_rid, node_lid); nonoverlap_elem_node_set.insert(node_gid); } } @@ -613,21 +606,20 @@ struct Mesh_t // by now the set contains, with no repeats, all the global node indices belonging to the non overlapping element list on this MPI rank // now pass the contents of the set over to a CArrayKokkos, then create a map to find local ghost indices from global ghost indices - nnonoverlap_elem_nodes = nonoverlap_elem_node_set.size(); - nonoverlap_elem_nodes = Kokkos::DualView("nonoverlap_elem_nodes", nnonoverlap_elem_nodes); + size_t nnonoverlap_elem_nodes = nonoverlap_elem_node_set.size(); + DCArrayKokkosw nonoverlap_elem_nodes(nnonoverlap_elem_nodes, "nonoverlap_elem_nodes"); if(nnonoverlap_elem_nodes){ int inonoverlap_elem_node = 0; auto it = nonoverlap_elem_node_set.begin(); while (it != nonoverlap_elem_node_set.end()) { - nonoverlap_elem_nodes.h_view(inonoverlap_elem_node++) = *it; + nonoverlap_elem_nodes.host(inonoverlap_elem_node++) = *it; it++; } - nonoverlap_elem_nodes.modify_host(); - nonoverlap_elem_nodes.sync_device(); + nonoverlap_elem_nodes.update_device(); } // create a Map for node indices belonging to the non-overlapping set of elements - nonoverlap_element_node_map = Teuchos::rcp(new Tpetra::Map(Teuchos::OrdinalTraits::invalid(), nonoverlap_elem_nodes.d_view, 0, comm)); + nonoverlap_element_node_map = DistributedMap(nonoverlap_elem_nodes); // std::cout << "number of patches = " << mesh->num_patches() << std::endl; if (myrank == 0) diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index a492644fc..16b8b4564 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -66,6 +66,13 @@ using DistributedDFArray = TpetraDFArray; template using DistributedDCArray = TpetraDCArray; + +template +void super_vector_initialization(DistributedDCArray super_vector, DistributedDCArray sub_vector, size_t extent); + +template +void super_vector_initialization(DistributedDFArray super_vector, DistributedDFArray sub_vector, size_t extent); + ///////////////////////////////////////////////////////////////////////////// /// /// \struct fillGaussState_t @@ -275,16 +282,16 @@ enum class node_state ///////////////////////////////////////////////////////////////////////////// struct node_t { - DistributedDCArray coords; ///< Nodal coordinates - DistributedDCArray coords_n0; ///< Nodal coordinates at tn=0 of time integration - DistributedDCArray vel; ///< Nodal velocity - DistributedDCArray vel_n0; ///< Nodal velocity at tn=0 of time integration - DistributedDCArray mass; ///< Nodal mass - DistributedDCArray force; ///< Nodal force - DistributedDCArray temp; ///< Nodal temperature - DistributedDCArray temp_n0; ///< Nodal temperature at tn=0 of time integration - DistributedDCArray q_transfer; ///< Nodal heat flux - DistributedDCArray gradient_level_set; ///< Nodal gradient of the level set function + DistributedDCArray coords, local_coords, ghost_coords; ///< Nodal coordinates + DistributedDCArray coords_n0, local_coords_n0, ghost_coords_n0; ///< Nodal coordinates at tn=0 of time integration + DistributedDCArray vel, local_vel, ghost_vel; ///< Nodal velocity + DistributedDCArray vel_n0, local_vel_n0, ghost_vel_n0; ///< Nodal velocity at tn=0 of time integration + DistributedDCArray mass, local_mass, ghost_mass; ///< Nodal mass + DistributedDCArray force, local_force, ghost_force; ///< Nodal force + DistributedDCArray temp, local_temp, ghost_temp; ///< Nodal temperature + DistributedDCArray temp_n0, local_temp_n0, ghost_temp_n0; ///< Nodal temperature at tn=0 of time integration + DistributedDCArray q_transfer, local_q_transfer, ghost_q_transfer; ///< Nodal heat flux + DistributedDCArray gradient_level_set, local_gradient_level_set, ghost_gradient_level_set; ///< Nodal gradient of the level set function // initialization method (num_nodes, num_dims, state to allocate) void initialize(size_t num_nodes, size_t num_dims, std::vector node_states) @@ -322,38 +329,86 @@ struct node_t } }; // end method - //initialize overload with a partitioned map - void initialize(DistributedMap partitioned_map, size_t num_dims, std::vector node_states) - { - for (auto field : node_states){ - switch(field){ - case node_state::coords: - if (coords.size() == 0) this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); - if (coords_n0.size() == 0) this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); - break; - case node_state::velocity: - if (vel.size() == 0) this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); - if (vel_n0.size() == 0) this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); - break; - case node_state::force: - if (force.size() == 0) this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); - break; - case node_state::mass: - if (mass.size() == 0) this->mass = DistributedDCArray(partitioned_map, "node_mass"); - break; - case node_state::temp: - if (temp.size() == 0) this->temp = DistributedDCArray(partitioned_map, "node_temp"); - if (temp_n0.size() == 0) this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); - break; - case node_state::heat_transfer: - if (q_transfer.size() == 0) this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); - break; - case node_state::gradient_level_set: - if (gradient_level_set.size() == 0) this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); - break; - default: - std::cout<<"Desired node state not understood in node_t initialize"< node_states, DistributedMap subview_map = DistributedMap(), size_t offset=0) + { + if(subview_map.size() == 0){ + for (auto field : node_states){ + switch(field){ + case node_state::coords: + if (coords.size() == 0) this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); + if (coords_n0.size() == 0) this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); + break; + case node_state::velocity: + if (vel.size() == 0) this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); + if (vel_n0.size() == 0) this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); + break; + case node_state::force: + if (force.size() == 0) this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); + break; + case node_state::mass: + if (mass.size() == 0) this->mass = DistributedDCArray(partitioned_map, "node_mass"); + break; + case node_state::temp: + if (temp.size() == 0) this->temp = DistributedDCArray(partitioned_map, "node_temp"); + if (temp_n0.size() == 0) this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); + break; + case node_state::heat_transfer: + if (q_transfer.size() == 0) this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); + break; + case node_state::gradient_level_set: + if (gradient_level_set.size() == 0) this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); + break; + default: + std::cout<<"Desired node state not understood in node_t initialize"<local_coords = this->coords; + this->local_coords_n0 = this->coords_n0; + //storage for nlocal+nghost + this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); + this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); + //assign local data to new storage + super_vector_initialization(this->coords, this->local_coords, subview_map.size()); + super_vector_initialization(this->coords_n0, this->local_coords_n0, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_coords = DistributedDCArray(this->coords,subview_map); + this->local_coords_n0 = DistributedDCArray(this->coords_n0,subview_map); + break; + case node_state::velocity: + this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); + this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); + break; + case node_state::force: + this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); + break; + case node_state::mass: + this->mass = DistributedDCArray(partitioned_map, "node_mass"); + break; + case node_state::temp: + this->temp = DistributedDCArray(partitioned_map, "node_temp"); + this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); + break; + case node_state::heat_transfer: + this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); + break; + case node_state::gradient_level_set: + this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); + break; + default: + std::cout<<"Desired node state not understood in node_t initialize"< MaterialZones; ///< access as MaterialZones(mat_id).var(mat_zone), only used with arbitrary-order FE }; // end state_t +template +void super_vector_initialization(DistributedDCArray super_vector, DistributedDCArray sub_vector, size_t extent){ + FOR_ALL(i, 0, extent, { + for (size_t dim = 0; dim < super_vector.component_length(); dim++) { + super_vector(i, dim) = sub_vector(i, dim); + } + }); // end parallel for corners +} +template +void super_vector_initialization(DistributedDFArray super_vector, DistributedDFArray sub_vector, size_t extent){ + FOR_ALL(i, 0, extent, { + for (size_t dim = 0; dim < super_vector.component_length(); dim++) { + super_vector(i, dim) = sub_vector(i, dim); + } + }); // end parallel for corners +} From 3acb0ce4512a176fcbe7fc758f5d668014a3916b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Wed, 18 Jun 2025 09:33:03 -0600 Subject: [PATCH 21/66] WIP: MPI refactor work --- .../src/common/include/state.h | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 16b8b4564..31e8c1a0d 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -386,24 +386,72 @@ struct node_t this->local_coords_n0 = DistributedDCArray(this->coords_n0,subview_map); break; case node_state::velocity: + //store local data with existing managed view made by mesh read for now + this->local_vel = this->vel; + this->local_vel_n0 = this->vel_n0; + //storage for nlocal+nghost this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); + //assign local data to new storage + super_vector_initialization(this->vel, this->local_vel, subview_map.size()); + super_vector_initialization(this->vel_n0, this->local_vel_n0, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_vel = DistributedDCArray(this->vel,subview_map); + this->local_vel_n0 = DistributedDCArray(this->vel_n0,subview_map); break; case node_state::force: + //store local data with existing managed view made by mesh read for now + this->local_force = this->force; + //storage for nlocal+nghost this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); + //assign local data to new storage + super_vector_initialization(this->force, this->local_force, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_force = DistributedDCArray(this->force,subview_map); break; case node_state::mass: + //store local data with existing managed view made by mesh read for now + this->local_mass = this->mass; + //storage for nlocal+nghost this->mass = DistributedDCArray(partitioned_map, "node_mass"); + //assign local data to new storage + super_vector_initialization(this->mass, this->local_mass, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_mass = DistributedDCArray(this->mass,subview_map); break; case node_state::temp: + //store local data with existing managed view made by mesh read for now + this->local_temp = this->temp; + this->local_temp_n0 = this->temp_n0; + //storage for nlocal+nghost this->temp = DistributedDCArray(partitioned_map, "node_temp"); this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); + //assign local data to new storage + super_vector_initialization(this->temp, this->local_temp, subview_map.size()); + super_vector_initialization(this->temp_n0, this->local_temp_n0, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_temp = DistributedDCArray(this->temp,subview_map); + this->local_temp_n0 = DistributedDCArray(this->temp_n0,subview_map); break; case node_state::heat_transfer: + //store local data with existing managed view made by mesh read for now + this->local_q_transfer = this->q_transfer; + //storage for nlocal+nghost this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); + //assign local data to new storage + super_vector_initialization(this->q_transfer, this->local_q_transfer, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_q_transfer = DistributedDCArray(this->q_transfer,subview_map); break; case node_state::gradient_level_set: + //store local data with existing managed view made by mesh read for now + this->local_gradient_level_set = this->gradient_level_set; + //storage for nlocal+nghost this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); + //assign local data to new storage + super_vector_initialization(this->gradient_level_set, this->local_gradient_level_set, subview_map.size()); + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_gradient_level_set = DistributedDCArray(this->gradient_level_set,subview_map); break; default: std::cout<<"Desired node state not understood in node_t initialize"< Date: Sun, 22 Jun 2025 23:23:16 -0600 Subject: [PATCH 22/66] WIP: MPI refactor --- .../src/common/include/mesh.h | 50 +++++++++++++++---- .../src/common/include/state.h | 2 + 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 54524ebaa..6b7415483 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -268,6 +268,9 @@ struct Mesh_t DistributedMap element_map; ///< partition of uniquely owned + shared elements (stores global node IDs on each process) DistributedMap nonoverlap_element_node_map; // map of node indices belonging to unique element map + //communication plans + CommunicationPlan node_coords_comms; + RaggedRightArrayKokkos corners_in_node; ///< Corners connected to a node CArrayKokkos num_corners_in_node; ///< Number of corners connected to a node RaggedRightArrayKokkos elems_in_node; ///< Elements connected to a given node @@ -576,18 +579,16 @@ struct Mesh_t // element_map->describe(*fos,Teuchos::VERB_EXTREME); // all_element_map->describe(*fos,Teuchos::VERB_EXTREME); // create distributed multivector of the local node data and all (local + ghost) node storage + std::vector required_node_state = { node_state::coords }; + //constructs local + ghost coords array with local coords as a subview for first nlocal entrie + node.initialize(all_node_map, num_dims, required_node_state, node_map); - all_node_coords_distributed = Teuchos::rcp(new MV(all_node_map, num_dim)); - ghost_node_coords_distributed = Teuchos::rcp(new MV(ghost_node_map, num_dim)); - - // create import object using local node indices map and all indices map - comm_importer_setup(); + /* create forward comms objects; setup for new map pairs should only be done here, construct using these existing comm plans + for any new pair of vectors requiring the same map pairs and comm mode afterwards*/ + forward_comms_setup(); - // create export objects for reverse comms - comm_exporter_setup(); - - // comms to get ghosts - all_node_coords_distributed->doImport(*node_coords_distributed, *importer, Tpetra::INSERT); + // create reverse comms + //reverse_comms_setup(); // construct map of nodes that belong to the non-overlapping element set (contained by ghost + local node set but not all of them) std::set nonoverlap_elem_node_set; @@ -628,6 +629,35 @@ struct Mesh_t } } + /* ---------------------------------------------------------------------- + Setup Tpetra importers for comms + ------------------------------------------------------------------------- */ + + void forward_comms_setup() + { + // create import object using local node indices map and ghost indices map + node_coords_comms = CommunicationPlan(node.coords, node.local_coords); + + // output map and importers + //sorted_map = Teuchos::rcp(new Tpetra::Map(num_nodes, 0, comm)); + //node_sorting_importer = Teuchos::rcp(new Tpetra::Import(map, sorted_map)); + // sorted element mapping + //sorted_element_map = Teuchos::rcp(new Tpetra::Map(num_elem, 0, comm)); + //element_sorting_importer = Teuchos::rcp(new Tpetra::Import(all_element_map, sorted_element_map));; + } + + /* ---------------------------------------------------------------------- + Setup Tpetra exporters for reverse comms + ------------------------------------------------------------------------- */ + + void reverse_comms_setup() + { + //currently don't use anything like force tallies from ghost nodes + //only use in TO solver was a BC flag + // create import object using local node indices map and ghost indices map + //exporter = Teuchos::rcp(new Tpetra::Export(all_node_map, map)); + } + // build the corner mesh connectivity arrays void build_corner_connectivity() { diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 31e8c1a0d..d02588620 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -65,6 +65,8 @@ template using DistributedDFArray = TpetraDFArray; template using DistributedDCArray = TpetraDCArray; +template +using CommunicationPlan = TpetraLRCommunicationPlan; template From deded92e26b30c9af96bdffc1b8bbabbae46b13d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Wed, 25 Jun 2025 22:19:04 -0600 Subject: [PATCH 23/66] WIP: MPI refactor work --- single-node-refactor/CMakeLists.txt | 5 -- .../src/common/include/mesh.h | 38 ++++---- .../src/common/include/state.h | 87 ++++++++++++++----- single-node-refactor/src/driver.cpp | 4 +- .../strength/decoupled_plasticity.h | 12 +-- 5 files changed, 91 insertions(+), 55 deletions(-) diff --git a/single-node-refactor/CMakeLists.txt b/single-node-refactor/CMakeLists.txt index 6c2e41896..fae8c911d 100755 --- a/single-node-refactor/CMakeLists.txt +++ b/single-node-refactor/CMakeLists.txt @@ -100,13 +100,8 @@ include_directories(src/common) include_directories(src/input) add_subdirectory(src/input) - - add_subdirectory(src) - - - # Add uninstall target if(NOT TARGET uninstall) configure_file( diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 6b7415483..b8bc209d6 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -269,7 +269,7 @@ struct Mesh_t DistributedMap nonoverlap_element_node_map; // map of node indices belonging to unique element map //communication plans - CommunicationPlan node_coords_comms; + CommPlan node_coords_comms; RaggedRightArrayKokkos corners_in_node; ///< Corners connected to a node CArrayKokkos num_corners_in_node; ///< Number of corners connected to a node @@ -389,11 +389,9 @@ struct Mesh_t /* ---------------------------------------------------------------------- Initialize Ghost and Non-Overlapping Element Maps ------------------------------------------------------------------------- */ - void init_maps() + void init_maps(node_t& node) { - int num_dim = simparam.num_dims; int local_node_index, current_column_index; - int num_nodes_in_elem; long long int node_gid; int myrank, nranks; MPI_Comm_rank(MPI_COMM_WORLD,&myrank); @@ -401,6 +399,7 @@ struct Mesh_t num_ghost_nodes=0; DCArrayKokkos ghost_nodes; + DCArrayKokkos ghost_node_ranks; if (num_elems >= 1) { // Construct set of ghost nodes; start with a buffer with upper limit @@ -409,7 +408,6 @@ struct Mesh_t for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) { // set nodes per element - num_nodes_in_elem = elem->num_nodes(); for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { node_gid = nodes_in_elem(cell_rid, node_lid); //nodes in elem still stores global indices @@ -424,6 +422,8 @@ struct Mesh_t // now pass the contents of the set over to a CArrayKokkos, then create a map to find local ghost indices from global ghost indices num_ghost_nodes = ghost_node_set.size(); + int ighost = 0; + auto it = ghost_node_set.begin(); // create a Map for ghost node indices ghost_nodes = DCArrayKokkos(num_ghost_nodes, "ghost_nodes"); //pass this into map object @@ -436,8 +436,6 @@ struct Mesh_t //Use the ranks to break ties in shared element assignment for a unique element map used in elem set reductions later //this wont be that great at load balancing element counts but its simple and works for now ghost_node_ranks = DCArrayKokkos(num_ghost_nodes, "ghost_nodes_ranks"); - int ighost = 0; - auto it = ghost_node_set.begin(); // debug print of ghost nodes // std::cout << " GHOST NODE SET ON TASK " << myrank << std::endl; @@ -461,7 +459,7 @@ struct Mesh_t DCArrayKokkos all_nodes; if(num_nodes){ // CArrayKokkos all_node_indices(num_nodes, "all_node_indices"); - all_nodes = DCArrayKokkosDCArrayKokkos(num_nodes, "num_nodes"); + all_nodes = DCArrayKokkos(num_nodes, "num_nodes"); for (int i = 0; i < num_nodes; i++) { if (i < num_local_nodes) @@ -506,7 +504,7 @@ struct Mesh_t } if (my_element_flag) { - Initial_Element_Global_Indices.host(nonoverlapping_count++) = all_element_map.getGlobalIndex(ielem); + Initial_Element_Global_Indices.host(nonoverlapping_count++) = element_map.getGlobalIndex(ielem); } } @@ -529,7 +527,7 @@ struct Mesh_t for (int ielem = 0; ielem < num_elems; ielem++) { - Initial_Element_Global_Indices.host(ielem) = all_element_map.getGlobalIndex(ielem); + Initial_Element_Global_Indices.host(ielem) = element_map.getGlobalIndex(ielem); } // re-sort so local elements in the nonoverlapping map are first in storage @@ -570,14 +568,14 @@ struct Mesh_t Initial_Element_Global_Indices.update_device(); nodes_in_elem.update_device(); - all_element_map = DistributedMap(Initial_Element_Global_Indices); + element_map = DistributedMap(Initial_Element_Global_Indices); //redefine nodes_in_elem so partition map of the distributed array is synchronized with permuted dual view contents - DistributedDCArray nodes_in_elem_temp(all_element_map, num_nodes_in_elem); + DistributedDCArray nodes_in_elem_temp(element_map, num_nodes_in_elem); nodes_in_elem_temp.replace_kokkos_dual_view(nodes_in_elem.get_kokkos_dual_view()); nodes_in_elem = nodes_in_elem_temp; // element_map->describe(*fos,Teuchos::VERB_EXTREME); - // all_element_map->describe(*fos,Teuchos::VERB_EXTREME); + // element_map->describe(*fos,Teuchos::VERB_EXTREME); // create distributed multivector of the local node data and all (local + ghost) node storage std::vector required_node_state = { node_state::coords }; //constructs local + ghost coords array with local coords as a subview for first nlocal entrie @@ -585,10 +583,10 @@ struct Mesh_t /* create forward comms objects; setup for new map pairs should only be done here, construct using these existing comm plans for any new pair of vectors requiring the same map pairs and comm mode afterwards*/ - forward_comms_setup(); + forward_comms_setup(node); // create reverse comms - //reverse_comms_setup(); + //reverse_comms_setup(node); // construct map of nodes that belong to the non-overlapping element set (contained by ghost + local node set but not all of them) std::set nonoverlap_elem_node_set; @@ -608,7 +606,7 @@ struct Mesh_t // by now the set contains, with no repeats, all the global node indices belonging to the non overlapping element list on this MPI rank // now pass the contents of the set over to a CArrayKokkos, then create a map to find local ghost indices from global ghost indices size_t nnonoverlap_elem_nodes = nonoverlap_elem_node_set.size(); - DCArrayKokkosw nonoverlap_elem_nodes(nnonoverlap_elem_nodes, "nonoverlap_elem_nodes"); + DCArrayKokkos nonoverlap_elem_nodes(nnonoverlap_elem_nodes, "nonoverlap_elem_nodes"); if(nnonoverlap_elem_nodes){ int inonoverlap_elem_node = 0; auto it = nonoverlap_elem_node_set.begin(); @@ -633,24 +631,24 @@ struct Mesh_t Setup Tpetra importers for comms ------------------------------------------------------------------------- */ - void forward_comms_setup() + void forward_comms_setup(node_t& node) { // create import object using local node indices map and ghost indices map - node_coords_comms = CommunicationPlan(node.coords, node.local_coords); + node_coords_comms = CommPlan(node.coords, node.local_coords); // output map and importers //sorted_map = Teuchos::rcp(new Tpetra::Map(num_nodes, 0, comm)); //node_sorting_importer = Teuchos::rcp(new Tpetra::Import(map, sorted_map)); // sorted element mapping //sorted_element_map = Teuchos::rcp(new Tpetra::Map(num_elem, 0, comm)); - //element_sorting_importer = Teuchos::rcp(new Tpetra::Import(all_element_map, sorted_element_map));; + //element_sorting_importer = Teuchos::rcp(new Tpetra::Import(element_map, sorted_element_map));; } /* ---------------------------------------------------------------------- Setup Tpetra exporters for reverse comms ------------------------------------------------------------------------- */ - void reverse_comms_setup() + void reverse_comms_setup(node_t& node) { //currently don't use anything like force tallies from ghost nodes //only use in TO solver was a BC flag diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 4b2dcb787..aa332aabd 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -66,7 +66,7 @@ using DistributedDFArray = TpetraDFArray; template using DistributedDCArray = TpetraDCArray; template -using CommunicationPlan = TpetraLRCommunicationPlan; +using CommPlan = TpetraLRCommunicationPlan; template @@ -369,89 +369,130 @@ struct node_t } } else{ - //first assign already partitioned local array to the local variable since up until now there was no local vs all distinction + //first assign already partitioned local array to the local variable (if allocated) since up until now there was no local vs all distinction //then create array storing all = local + ghost array using the corresponding local array as a subview to avoid duplicate storage for (auto field : node_states){ switch(field){ case node_state::coords: //store local data with existing managed view made by mesh read for now - this->local_coords = this->coords; - this->local_coords_n0 = this->coords_n0; + if(this->local_coords.size()==0&&this->coords.size()!=0){ + this->local_coords = this->coords; + } + if(this->local_coords_n0.size()==0&&this->coords_n0.size()!=0){ + this->local_coords_n0 = this->coords_n0; + } //storage for nlocal+nghost this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); - //assign local data to new storage - super_vector_initialization(this->coords, this->local_coords, subview_map.size()); - super_vector_initialization(this->coords_n0, this->local_coords_n0, subview_map.size()); + //assign local data to new storage if local data was allocated + if(this->local_coords.size()!=0){ + super_vector_initialization(this->coords, this->local_coords, subview_map.size()); + } + if(this->local_coords_n0.size()!=0){ + super_vector_initialization(this->coords_n0, this->local_coords_n0, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_coords = DistributedDCArray(this->coords,subview_map); this->local_coords_n0 = DistributedDCArray(this->coords_n0,subview_map); break; case node_state::velocity: //store local data with existing managed view made by mesh read for now - this->local_vel = this->vel; - this->local_vel_n0 = this->vel_n0; + if(this->local_vel.size()==0&&this->vel.size()!=0){ + this->local_vel = this->vel; + } + if(this->local_vel_n0.size()==0&&this->vel_n0.size()!=0){ + this->local_vel_n0 = this->vel_n0; + } //storage for nlocal+nghost this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); //assign local data to new storage - super_vector_initialization(this->vel, this->local_vel, subview_map.size()); - super_vector_initialization(this->vel_n0, this->local_vel_n0, subview_map.size()); + if(this->local_vel.size()!=0){ + super_vector_initialization(this->vel, this->local_vel, subview_map.size()); + } + if(this->local_vel_n0.size()!=0){ + super_vector_initialization(this->vel_n0, this->local_vel_n0, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_vel = DistributedDCArray(this->vel,subview_map); this->local_vel_n0 = DistributedDCArray(this->vel_n0,subview_map); break; case node_state::force: //store local data with existing managed view made by mesh read for now - this->local_force = this->force; + if(this->local_force.size()==0&&this->force.size()!=0){ + this->local_force = this->force; + } //storage for nlocal+nghost this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); //assign local data to new storage - super_vector_initialization(this->force, this->local_force, subview_map.size()); + if(this->local_force.size()!=0){ + super_vector_initialization(this->force, this->local_force, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_force = DistributedDCArray(this->force,subview_map); break; case node_state::mass: //store local data with existing managed view made by mesh read for now - this->local_mass = this->mass; + if(this->local_mass.size()==0&&this->mass.size()!=0){ + this->local_mass = this->mass; + } //storage for nlocal+nghost this->mass = DistributedDCArray(partitioned_map, "node_mass"); //assign local data to new storage - super_vector_initialization(this->mass, this->local_mass, subview_map.size()); + if(this->local_mass.size()!=0){ + super_vector_initialization(this->mass, this->local_mass, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_mass = DistributedDCArray(this->mass,subview_map); break; case node_state::temp: //store local data with existing managed view made by mesh read for now - this->local_temp = this->temp; - this->local_temp_n0 = this->temp_n0; + if(this->local_temp.size()==0&&this->temp.size()!=0){ + this->local_temp = this->temp; + } + if(this->local_temp_n0.size()==0&&this->temp_n0.size()!=0){ + this->local_temp_n0 = this->temp_n0; + } //storage for nlocal+nghost this->temp = DistributedDCArray(partitioned_map, "node_temp"); this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); //assign local data to new storage - super_vector_initialization(this->temp, this->local_temp, subview_map.size()); - super_vector_initialization(this->temp_n0, this->local_temp_n0, subview_map.size()); + if(this->local_temp.size()!=0){ + super_vector_initialization(this->temp, this->local_temp, subview_map.size()); + } + if(this->local_temp_n0.size()!=0){ + super_vector_initialization(this->temp_n0, this->local_temp_n0, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_temp = DistributedDCArray(this->temp,subview_map); this->local_temp_n0 = DistributedDCArray(this->temp_n0,subview_map); break; case node_state::heat_transfer: //store local data with existing managed view made by mesh read for now - this->local_q_transfer = this->q_transfer; + if(this->local_q_transfer.size()==0&&this->q_transfer.size()!=0){ + this->local_q_transfer = this->q_transfer; + } //storage for nlocal+nghost this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); + //assign local data to new storage - super_vector_initialization(this->q_transfer, this->local_q_transfer, subview_map.size()); + if(this->local_q_transfer.size()!=0){ + super_vector_initialization(this->q_transfer, this->local_q_transfer, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_q_transfer = DistributedDCArray(this->q_transfer,subview_map); break; case node_state::gradient_level_set: //store local data with existing managed view made by mesh read for now - this->local_gradient_level_set = this->gradient_level_set; + if(this->local_gradient_level_set.size()==0&&this->gradient_level_set.size()!=0){ + this->local_gradient_level_set = this->gradient_level_set; + } //storage for nlocal+nghost this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); //assign local data to new storage - super_vector_initialization(this->gradient_level_set, this->local_gradient_level_set, subview_map.size()); + if(this->gradient_level_set.size()!=0){ + super_vector_initialization(this->gradient_level_set, this->local_gradient_level_set, subview_map.size()); + } //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here this->local_gradient_level_set = DistributedDCArray(this->gradient_level_set,subview_map); break; diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index 56455d153..2c3826572 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -107,14 +107,16 @@ void Driver::initialize() } //build relevant partition maps for ghost nodes, elements, etc. - mesh.init_maps(); + mesh.init_maps(State.node); // Build boundary conditions const int num_bcs = BoundaryConditions.num_bcs; + //make bcs MPI parallel? // --- calculate bdy sets ---// mesh.init_bdy_sets(num_bcs); tag_bdys(BoundaryConditions, mesh, State.node.coords); + build_boundry_node_sets(mesh); diff --git a/single-node-refactor/src/material_models/strength/decoupled_plasticity.h b/single-node-refactor/src/material_models/strength/decoupled_plasticity.h index b603cb876..0bebb3258 100644 --- a/single-node-refactor/src/material_models/strength/decoupled_plasticity.h +++ b/single-node-refactor/src/material_models/strength/decoupled_plasticity.h @@ -123,9 +123,9 @@ namespace HypoPlasticityModel { KOKKOS_FUNCTION static void calc_stress( const DCArrayKokkos &GaussPoints_vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, + const DistributedDCArray &nodes_in_elem, const DRaggedRightArrayKokkos &MaterialPoints_pres, const DRaggedRightArrayKokkos &MaterialPoints_stress, const DRaggedRightArrayKokkos &MaterialPoints_stress_n0, @@ -416,9 +416,9 @@ namespace HypoPlasticityRZModel { KOKKOS_FUNCTION static void calc_stress( const DCArrayKokkos &GaussPoints_vel_grad, - const DCArrayKokkos &node_coords, - const DCArrayKokkos &node_vel, - const DCArrayKokkos &nodes_in_elem, + const DistributedDCArray &node_coords, + const DistributedDCArray &node_vel, + const DistributedDCArray &nodes_in_elem, const DRaggedRightArrayKokkos &MaterialPoints_pres, const DRaggedRightArrayKokkos &MaterialPoints_stress, const DRaggedRightArrayKokkos &MaterialPoints_stress_n0, From 6d61337e34068850fa551cf2739350791392a4db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 26 Jun 2025 23:48:17 -0600 Subject: [PATCH 24/66] WIP: MPI refactor --- .../SGH_solver_3D/include/sgh_solver_3D.h | 10 +++- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 46 +++++++++++++++---- .../SGH_solver_3D/src/sgh_initialize.cpp | 8 +++- .../SGH_solver_rz/include/sgh_solver_rz.h | 2 +- .../SGH_solver_rz/src/sgh_initialize_rz.cpp | 2 +- .../SGTM_solver_3D/include/sgtm_solver_3D.h | 2 +- .../SGTM_solver_3D/src/sgtm_initialize.cpp | 2 +- .../include/level_set_solver.h | 2 +- .../src/level_set_initialize.cpp | 2 +- .../src/common/include/mesh.h | 5 +- single-node-refactor/src/solver.h | 2 +- 11 files changed, 60 insertions(+), 23 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h index 7bbcf1631..096c842b1 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h @@ -146,6 +146,10 @@ class SGH3D : public Solver ~SGH3D() = default; + //member variables + CommPlan node_velocity_comms; + CommPlan node_mass_comms; + ///////////////////////////////////////////////////////////////////////////// /// /// \fn Initialize @@ -157,7 +161,7 @@ class SGH3D : public Solver Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const override; + State_t& State) override; void initialize_material_state(SimulationParameters_t& SimulationParameters, @@ -449,8 +453,10 @@ class SGH3D : public Solver double sum_domain_internal_energy( const DRaggedRightArrayKokkos& MaterialPoints_mass, const DRaggedRightArrayKokkos& MaterialPoints_sie, + const MaterialToMeshMap_t& MaterialToMeshMaps, const size_t num_mat_points, - const size_t mat_id); + const size_t mat_id, + const size_t num_local_elems); double sum_domain_kinetic_energy( const Mesh_t& mesh, diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index ee72d4ef4..f4f45ca99 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -102,6 +102,8 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, double IE_t0 = 0.0; double KE_t0 = 0.0; double TE_t0 = 0.0; + double local_IE_t0 = 0.0; + double local_KE_t0 = 0.0; double cached_pregraphics_dt = fuzz; @@ -111,17 +113,24 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, // extensive IE for (size_t mat_id = 0; mat_id < num_mats; mat_id++) { - IE_t0 += sum_domain_internal_energy(State.MaterialPoints.mass, + local_IE_t0 += sum_domain_internal_energy(State.MaterialPoints.mass, State.MaterialPoints.sie, + State.MaterialToMeshMaps, State.MaterialPoints.num_material_points.host(mat_id), - mat_id); + mat_id, + mesh.num_local_elems); } // end loop over mat_id // extensive KE - KE_t0 = sum_domain_kinetic_energy(mesh, + local_KE_t0 = sum_domain_kinetic_energy(mesh, State.node.vel, State.node.coords, State.node.mass); + + //collect KE and TE sums across all processes + MPI_Allreduce(&local_IE_t0, &IE_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_KE_t0, &KE_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + // extensive TE TE_t0 = IE_t0 + KE_t0; @@ -212,7 +221,8 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, min_dt_calc = fmin(dt_mat, min_dt_calc); } // end for loop over all mats - dt = min_dt_calc; // save this dt time step + //Find the minimum timestep across all MPI processes + MPI_Allreduce(&min_dt_calc, &dt, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); if (cycle == 0) { printf("cycle = %lu, time = %f, time step = %f \n", cycle, time_value, dt); @@ -351,6 +361,9 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, // ---- apply contact boundary conditions to the boundary patches---- boundary_contact(mesh, BoundaryConditions, State.node.vel, time_value); + //update node velocity on ghosts + node_velocity_comms.execute_comms(); + // mpi_coms(); for (size_t mat_id = 0; mat_id < num_mats; mat_id++) { @@ -508,21 +521,29 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, double IE_tend = 0.0; double KE_tend = 0.0; double TE_tend = 0.0; + double local_IE_tend = 0.0; + double local_KE_tend = 0.0; // extensive IE for(size_t mat_id = 0; mat_id < num_mats; mat_id++){ - IE_tend += sum_domain_internal_energy(State.MaterialPoints.mass, + local_IE_tend += sum_domain_internal_energy(State.MaterialPoints.mass, State.MaterialPoints.sie, + State.MaterialToMeshMaps, State.MaterialPoints.num_material_points.host(mat_id), - mat_id); + mat_id, + mesh.num_local_elems); } // end loop over mat_id // extensive KE - KE_tend = sum_domain_kinetic_energy(mesh, + local_KE_tend = sum_domain_kinetic_energy(mesh, State.node.vel, State.node.coords, State.node.mass); + + MPI_Allreduce(&local_IE_t0, &IE_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_KE_t0, &KE_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + // extensive TE TE_tend = IE_tend + KE_tend; @@ -635,14 +656,17 @@ double max_Eigen3D(const ViewCArrayKokkos tensor) double sum_domain_internal_energy( const DRaggedRightArrayKokkos& MaterialPoints_mass, const DRaggedRightArrayKokkos& MaterialPoints_sie, + const MaterialToMeshMap_t& MaterialToMeshMaps, const size_t num_mat_points, - const size_t mat_id) + const size_t mat_id, + const size_t num_local_elems) { double IE_sum = 0.0; double IE_loc_sum; // loop over the material points and tally IE FOR_REDUCE_SUM(matpt_lid, 0, num_mat_points, IE_loc_sum, { + if(MaterialToMeshMaps.elem(mat_id, matpt_lid) < num_local_elems) IE_loc_sum += MaterialPoints_mass(mat_id,matpt_lid) * MaterialPoints_sie(mat_id,matpt_lid); }, IE_sum); Kokkos::fence(); @@ -675,11 +699,13 @@ double sum_domain_kinetic_energy( // extensive KE double KE_sum = 0.0; double KE_loc_sum; + int num_dims = mesh.num_dims; + int num_local_nodes = mesh.num_local_nodes; - FOR_REDUCE_SUM(node_gid, 0, mesh.num_nodes, KE_loc_sum, { + FOR_REDUCE_SUM(node_gid, 0, num_local_nodes, KE_loc_sum, { double ke = 0; - for (size_t dim = 0; dim < mesh.num_dims; dim++) { + for (size_t dim = 0; dim < num_dims; dim++) { ke += node_vel(node_gid, dim) * node_vel(node_gid, dim); // 1/2 at end } // end for diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp index 563735895..5072f7f8b 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_initialize.cpp @@ -42,7 +42,7 @@ void SGH3D::initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const + State_t& State) { const size_t num_nodes = mesh.num_nodes; const size_t num_gauss_pts = mesh.num_elems; @@ -55,10 +55,14 @@ void SGH3D::initialize(SimulationParameters_t& SimulationParameters, } // mesh state - State.node.initialize(num_nodes, num_dims, SGH3D_State::required_node_state); + State.node.initialize(mesh.all_node_map, num_dims, SGH3D_State::required_node_state, mesh.node_map); //allocate shared nlocal+nghost contiguous array State.GaussPoints.initialize(num_gauss_pts, num_dims, SGH3D_State::required_gauss_pt_state); State.corner.initialize(num_corners, num_dims, SGH3D_State::required_corner_state); + //comms objects + node_velocity_comms = CommPlan(State.node.vel, State.node.local_vel, mesh.node_coords_comms); //copies MPI setup from coordinate comms since the node maps are the same + node_mass_comms = CommPlan(State.node.mass, State.node.local_mass, mesh.node_coords_comms); //copies MPI setup from coordinate comms since the node maps are the same + // check that the fills specify the required nodal fields bool filled_nodal_state = check_fill_node_states(SGH3D_State::required_fill_node_state, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h index 8e351090c..97fe6e88f 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h @@ -129,7 +129,7 @@ class SGHRZ : public Solver Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const override; + State_t& State) override; void initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp index a97169c00..ad1158343 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_initialize_rz.cpp @@ -41,7 +41,7 @@ void SGHRZ::initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const + State_t& State) { size_t num_nodes = mesh.num_nodes; size_t num_gauss_pts = mesh.num_elems; diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h b/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h index 4f1ca21b2..49c06a77b 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/include/sgtm_solver_3D.h @@ -168,7 +168,7 @@ class SGTM3D : public Solver Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const override; + State_t& State) override; void initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, diff --git a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp index 633e829d0..db0c465bc 100644 --- a/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp +++ b/single-node-refactor/src/Solvers/SGTM_solver_3D/src/sgtm_initialize.cpp @@ -41,7 +41,7 @@ void SGTM3D::initialize(SimulationParameters_t& SimulationParameters, Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const + State_t& State) { int num_nodes = mesh.num_nodes; int num_gauss_pts = mesh.num_elems; diff --git a/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h b/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h index 97d8a95ec..9705cf903 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h +++ b/single-node-refactor/src/Solvers/level_set_solver/include/level_set_solver.h @@ -115,7 +115,7 @@ class LevelSet : public Solver Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const override; + State_t& State) override; void initialize_material_state(SimulationParameters_t& SimulationParamaters, Material_t& Materials, diff --git a/single-node-refactor/src/Solvers/level_set_solver/src/level_set_initialize.cpp b/single-node-refactor/src/Solvers/level_set_solver/src/level_set_initialize.cpp index 84a39ff52..99def98c3 100644 --- a/single-node-refactor/src/Solvers/level_set_solver/src/level_set_initialize.cpp +++ b/single-node-refactor/src/Solvers/level_set_solver/src/level_set_initialize.cpp @@ -42,7 +42,7 @@ Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const + State_t& State) { const size_t num_nodes = mesh.num_nodes; const size_t num_gauss_pts = mesh.num_elems; diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index b8bc209d6..044c0ac59 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -518,7 +518,7 @@ struct Mesh_t Element_Global_Indices.update_device(); // create nonoverlapping element map - element_map = DistributedMap(Element_Global_Indices); + local_element_map = DistributedMap(Element_Global_Indices); // sort element connectivity so nonoverlaps are sequentially found first // define initial sorting of global indices @@ -540,7 +540,7 @@ struct Mesh_t { current_element_gid = Initial_Element_Global_Indices.host(ielem); // if this element is not part of the non overlap list then send it to the end of the storage and swap the element at the end - if (!element_map.isProcessGlobalIndex(current_element_gid)) + if (!local_element_map.isProcessGlobalIndex(current_element_gid)) { temp_element_gid = current_element_gid; for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) @@ -584,6 +584,7 @@ struct Mesh_t /* create forward comms objects; setup for new map pairs should only be done here, construct using these existing comm plans for any new pair of vectors requiring the same map pairs and comm mode afterwards*/ forward_comms_setup(node); + node_coords_comms.execute_comms(); // create reverse comms //reverse_comms_setup(node); diff --git a/single-node-refactor/src/solver.h b/single-node-refactor/src/solver.h index 1084f511d..760ffb424 100644 --- a/single-node-refactor/src/solver.h +++ b/single-node-refactor/src/solver.h @@ -63,7 +63,7 @@ class Solver Material_t& Materials, Mesh_t& mesh, BoundaryCondition_t& Boundary, - State_t& State) const = 0; + State_t& State) {} virtual void initialize_material_state(SimulationParameters_t& SimulationParameters, Material_t& Materials, From 19b801069206487fb70e2916b407ba88894d2b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Wed, 2 Jul 2025 00:29:44 -0600 Subject: [PATCH 25/66] WIP: MPI mesh builder --- .../src/common/include/mesh.h | 2 +- .../src/common/include/mesh_io.h | 246 ++++++++++-------- 2 files changed, 143 insertions(+), 105 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 044c0ac59..d81adb486 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -323,7 +323,7 @@ struct Mesh_t // initialization methods void initialize_nodes(const size_t num_nodes_inp) { - num_nodes = num_nodes_inp; + global_num_nodes = num_nodes_inp; return; }; // end method diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index afb2f9355..77d3a2b95 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1828,7 +1828,7 @@ class MeshBuilder } } else if (SimulationParameters.mesh_input.num_dims == 3) { - //build_3d_box(mesh, GaussPoints, node, corner, SimulationParameters); + build_3d_box(mesh, GaussPoints, node, corner, SimulationParameters); } else{ throw std::runtime_error("**** ONLY 2D RZ OR 3D MESHES ARE SUPPORTED ****"); @@ -2109,125 +2109,163 @@ class MeshBuilder /// \param Simulation parameters /// ///////////////////////////////////////////////////////////////////////////// - // void build_3d_box(Mesh_t& mesh, - // GaussPoint_t& GaussPoints, - // node_t& node, - // corner_t& corner, - // SimulationParameters_t& SimulationParameters) const - // { - // printf("Creating a 3D box mesh \n"); - - // const int num_dim = 3; - - // // SimulationParameters.mesh_input.length.update_host(); - // const double lx = SimulationParameters.mesh_input.length[0]; - // const double ly = SimulationParameters.mesh_input.length[1]; - // const double lz = SimulationParameters.mesh_input.length[2]; - - // // SimulationParameters.mesh_input.num_elems.update_host(); - // const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; - // const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; - // const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; - - // const int num_points_i = num_elems_i + 1; // num points in x - // const int num_points_j = num_elems_j + 1; // num points in y - // const int num_points_k = num_elems_k + 1; // num points in y - - // const int num_nodes = num_points_i * num_points_j * num_points_k; - - // const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) - // const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) - // const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) - - // const int num_elems = num_elems_i * num_elems_j * num_elems_k; - - // std::vector origin(num_dim); - // // SimulationParameters.mesh_input.origin.update_host(); - // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - - // // --- 3D parameters --- - // // const int num_faces_in_elem = 6; // number of faces in elem - // // const int num_points_in_elem = 8; // number of points in elem - // // const int num_points_in_face = 4; // number of points in a face - // // const int num_edges_in_elem = 12; // number of edges in a elem - - - // // initialize mesh node variables - // mesh.initialize_nodes(num_nodes); + void build_3d_box(Mesh_t& mesh, + GaussPoint_t& GaussPoints, + node_t& node, + corner_t& corner, + SimulationParameters_t& SimulationParameters) const + { + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + /*currently we just build the global mesh data on rank 0 and then broadcast relevant data to each rank + before the global mesh data on rank 0 falls out of scope*/ + int global_num_nodes, global_num_elems; - // // initialize node state variables, for now, we just need coordinates, the rest will be initialize by the respective solvers - // std::vector required_node_state = { node_state::coords }; - // node.initialize(num_nodes, num_dim, required_node_state); + const int num_dims = 3; + size_t num_nodes_in_elem = 1; + for (int dim = 0; dim < num_dims; dim++) { + num_nodes_in_elem *= 2; + } + if(myrank==0){ + printf("Creating a 3D box mesh \n"); - // // --- Build nodes --- + // SimulationParameters.mesh_input.length.update_host(); + const double lx = SimulationParameters.mesh_input.length[0]; + const double ly = SimulationParameters.mesh_input.length[1]; + const double lz = SimulationParameters.mesh_input.length[2]; - // // populate the point data structures - // for (int k = 0; k < num_points_k; k++) { - // for (int j = 0; j < num_points_j; j++) { - // for (int i = 0; i < num_points_i; i++) { - // // global id for the point - // int node_gid = get_id(i, j, k, num_points_i, num_points_j); + // SimulationParameters.mesh_input.num_elems.update_host(); + const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; - // // store the point coordinates - // node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; - // node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; - // node.coords.host(node_gid, 2) = origin[2] + (double)k * dz; - // } // end for i - // } // end for j - // } // end for k + const int num_points_i = num_elems_i + 1; // num points in x + const int num_points_j = num_elems_j + 1; // num points in y + const int num_points_k = num_elems_k + 1; // num points in y + global_num_nodes = num_points_i * num_points_j * num_points_k; - // node.coords.update_device(); + const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) + const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) + const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) - // // initialize elem variables - // mesh.initialize_elems(num_elems, num_dim); + const int global_num_elems = num_elems_i * num_elems_j * num_elems_k; - // // --- Build elems --- + std::vector origin(num_dims); + // SimulationParameters.mesh_input.origin.update_host(); + for (int i = 0; i < num_dims; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - // // populate the elem center data structures - // for (int k = 0; k < num_elems_k; k++) { - // for (int j = 0; j < num_elems_j; j++) { - // for (int i = 0; i < num_elems_i; i++) { - // // global id for the elem - // int elem_gid = get_id(i, j, k, num_elems_i, num_elems_j); + // --- 3D parameters --- + // const int num_faces_in_elem = 6; // number of faces in elem + // const int num_points_in_elem = 8; // number of points in elem + // const int num_points_in_face = 4; // number of points in a face + // const int num_edges_in_elem = 12; // number of edges in a elem + + // node coords data on rank 0 for all global nodes + DCArrayKokkos global_coords(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); + + // --- Build nodes --- + + // populate the point data structures + for (int k = 0; k < num_points_k; k++) { + for (int j = 0; j < num_points_j; j++) { + for (int i = 0; i < num_points_i; i++) { + // global id for the point + int node_gid = get_id(i, j, k, num_points_i, num_points_j); + + // store the point coordinates + global_coords.host(node_gid, 0) = origin[0] + (double)i * dx; + global_coords.host(node_gid, 1) = origin[1] + (double)j * dy; + global_coords.host(node_gid, 2) = origin[2] + (double)k * dz; + } // end for i + } // end for j + } // end for k + + + global_coords.update_device(); + + // initialize elem variables + DCArrayKokkos global_nodes_in_elem(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); + + // --- Build elems --- + + // populate the elem center data structures + for (int k = 0; k < num_elems_k; k++) { + for (int j = 0; j < num_elems_j; j++) { + for (int i = 0; i < num_elems_i; i++) { + // global id for the elem + int elem_gid = get_id(i, j, k, num_elems_i, num_elems_j); + + // store the point IDs for this elem where the range is + // (i:i+1, j:j+1, k:k+1) for a linear hexahedron + int this_point = 0; + for (int kcount = k; kcount <= k + 1; kcount++) { + for (int jcount = j; jcount <= j + 1; jcount++) { + for (int icount = i; icount <= i + 1; icount++) { + // global id for the points + int node_gid = get_id(icount, jcount, kcount, + num_points_i, num_points_j); + + // convert this_point index to the FE index convention + int this_index = this_point; //convert_point_number_in_Hex(this_point); + + // store the points in this elem according the the finite + // element numbering convention + global_nodes_in_elem.host(elem_gid, this_index) = node_gid; + + // increment the point counting index + this_point = this_point + 1; + } // end for icount + } // end for jcount + } // end for kcount + } // end for i + } // end for j + } // end for k + } - // // store the point IDs for this elem where the range is - // // (i:i+1, j:j+1, k:k+1) for a linear hexahedron - // int this_point = 0; - // for (int kcount = k; kcount <= k + 1; kcount++) { - // for (int jcount = j; jcount <= j + 1; jcount++) { - // for (int icount = i; icount <= i + 1; icount++) { - // // global id for the points - // int node_gid = get_id(icount, jcount, kcount, - // num_points_i, num_points_j); + //distribute partitioned data from the global mesh build data on rank 0 + size_t num_local_nodes; + DistributedMap map; + { //scoped so temp FArray data is auto deleted to save memory + //allocate pre-partition node coords using contiguous decomposition + //FArray type used since CArray type still doesnt support zoltan2 decomposition + DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); - // // convert this_point index to the FE index convention - // int this_index = this_point; //convert_point_number_in_Hex(this_point); + // construct contiguous parallel row map now that we know the number of nodes + map = node_coords_distributed.pmap; + // map->describe(*fos,Teuchos::VERB_EXTREME); - // // store the points in this elem according the the finite - // // element numbering convention - // mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + // set the vertices in the mesh read in + num_local_nodes = map.size(); + // end of coordinate readin + node_coords_distributed.update_device(); + // repartition node distribution + node_coords_distributed.repartition_vector(); + //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope + std::vector required_node_state = { node_state::coords }; + map = node_coords_distributed.pmap; + node.initialize(map, num_dims, required_node_state); + } - // // increment the point counting index - // this_point = this_point + 1; - // } // end for icount - // } // end for jcount - // } // end for kcount - // } // end for i - // } // end for j - // } // end for k + // initialize mesh node variables + mesh.initialize_nodes(global_num_nodes); + num_local_nodes = map.size(); + mesh.num_local_nodes = num_local_nodes; - // // update device side - // mesh.nodes_in_elem.update_device(); + // update device side + mesh.nodes_in_elem.update_device(); + + mesh.global_num_elems = global_num_elems; - // // initialize corner variables - // int num_corners = num_elems * mesh.num_nodes_in_elem; - // mesh.initialize_corners(num_corners); - // // corner.initialize(num_corners, num_dim); + // initialize corner variables + int num_corners = mesh.num_elems * mesh.num_nodes_in_elem; + mesh.initialize_corners(num_corners); + // corner.initialize(num_corners, num_dim); - // // Build connectivity - // mesh.build_connectivity(); - // } // end build_3d_box + // Build connectivity + mesh.build_connectivity(); + } // end build_3d_box ///////////////////////////////////////////////////////////////////////////// /// From 5e7f632fd5fb44c62f5152f5963672c72a38e2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 4 Jul 2025 00:06:15 -0600 Subject: [PATCH 26/66] WIP: MPI mesh build --- .../src/common/include/mesh_io.h | 402 +++++++++++++++++- 1 file changed, 388 insertions(+), 14 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 77d3a2b95..4edbb25da 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -2122,6 +2122,16 @@ class MeshBuilder before the global mesh data on rank 0 falls out of scope*/ int global_num_nodes, global_num_elems; + int local_node_index, current_column_index; + int buffer_loop, buffer_iteration, buffer_iterations, dof_limit, scan_loop; + int negative_index_found = 0; + int global_negative_index_found = 0; + + size_t read_index_start, node_rid, elem_gid; + long long int node_gid; + real_t dof_value; + real_t unit_scaling = 1; + const int num_dims = 3; size_t num_nodes_in_elem = 1; for (int dim = 0; dim < num_dims; dim++) { @@ -2163,7 +2173,7 @@ class MeshBuilder // const int num_edges_in_elem = 12; // number of edges in a elem // node coords data on rank 0 for all global nodes - DCArrayKokkos global_coords(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); + CArrayKokkos global_coords(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); // --- Build nodes --- @@ -2175,18 +2185,16 @@ class MeshBuilder int node_gid = get_id(i, j, k, num_points_i, num_points_j); // store the point coordinates - global_coords.host(node_gid, 0) = origin[0] + (double)i * dx; - global_coords.host(node_gid, 1) = origin[1] + (double)j * dy; - global_coords.host(node_gid, 2) = origin[2] + (double)k * dz; + global_coords(node_gid, 0) = origin[0] + (double)i * dx; + global_coords(node_gid, 1) = origin[1] + (double)j * dy; + global_coords(node_gid, 2) = origin[2] + (double)k * dz; } // end for i } // end for j } // end for k - global_coords.update_device(); - // initialize elem variables - DCArrayKokkos global_nodes_in_elem(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); + DCArrayKokkos global_nodes_in_elem(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); // --- Build elems --- @@ -2212,7 +2220,7 @@ class MeshBuilder // store the points in this elem according the the finite // element numbering convention - global_nodes_in_elem.host(elem_gid, this_index) = node_gid; + global_nodes_in_elem(elem_gid, this_index) = node_gid; // increment the point counting index this_point = this_point + 1; @@ -2227,6 +2235,18 @@ class MeshBuilder //distribute partitioned data from the global mesh build data on rank 0 size_t num_local_nodes; DistributedMap map; + // read coords + read_index_start = 0; + size_t num_local_nodes; + + buffer_iterations = global_num_nodes / BUFFER_LINES; + if (global_num_nodes % BUFFER_LINES != 0) + { + buffer_iterations++; + } + + read_buffer = CArrayKokkos(BUFFER_LINES, num_dims); + { //scoped so temp FArray data is auto deleted to save memory //allocate pre-partition node coords using contiguous decomposition //FArray type used since CArray type still doesnt support zoltan2 decomposition @@ -2238,6 +2258,75 @@ class MeshBuilder // set the vertices in the mesh read in num_local_nodes = map.size(); + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + { + + for (int idim = 0; idim < num_dims; idim++) + { + // debug print + // std::cout<<" "<< substring <getGlobalElement(inode) + 1 << " { "; + for (int istride = 0; istride < num_dims; istride++){ + std::cout << node_coords(inode,istride) << " , "; + } + std::cout << " }"<< std::endl; + } + */ + + // check that local assignments match global total + + // read in element info (ensight file format is organized in element type sections) + // loop over this later for several element type sections + + size_t global_num_elems = 0; + size_t num_elems = 0; + CArrayKokkos node_store(num_nodes_in_elem); + + // --- read the number of cells in the mesh --- + // --- Read the number of vertices in the mesh --- // + if (myrank == 0) + { + bool found = false; + while (found == false&&in.good()) { + std::getline(in, read_line); + line_parse.str(""); + line_parse.clear(); + line_parse << read_line; + line_parse >> substring; + + // looking for the following text: + // CELLS num_cells size + if (substring == "CELLS") + { + line_parse >> global_num_elems; + std::cout << "declared element count: " << global_num_elems << std::endl; + if (global_num_elems <= 0) + { + throw std::runtime_error("ERROR, NO ELEMENTS IN MESH"); + } + found = true; + } // end if + } // end while + + if (!found){ + throw std::runtime_error("ERROR: Failed to find CELLS"); + } // end if + } // end if(myrank==0) + + // broadcast number of elements + MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + //initialize num elem in mesh struct + + if (myrank == 0) + { + std::cout << "before mesh initialization" << std::endl; + } + + // read in element connectivity + // we're gonna reallocate for the words per line expected for the element connectivity + read_buffer = CArrayKokkos(BUFFER_LINES, elem_words_per_line, MAX_WORD); + + // calculate buffer iterations to read number of lines + buffer_iterations = global_num_elems / BUFFER_LINES; + int assign_flag; + + // dynamic buffer used to store elements before we know how many this rank needs + std::vector element_temp(BUFFER_LINES * elem_words_per_line); + std::vector global_indices_temp(BUFFER_LINES); + size_t buffer_max = BUFFER_LINES * elem_words_per_line; + size_t indices_buffer_max = BUFFER_LINES; + + if (global_num_elems % BUFFER_LINES != 0) + { + buffer_iterations++; + } + read_index_start = 0; + // std::cout << "ELEMENT BUFFER ITERATIONS: " << buffer_iterations << std::endl; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + { + getline(in, read_line); + line_parse.clear(); + line_parse.str(read_line); + // disregard node count line since we're using one element type per mesh + line_parse >> substring; + for (int iword = 0; iword < elem_words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring; + // assign the substring variable as a word of the read buffer + strcpy(&read_buffer(buffer_loop, iword, 0), substring.c_str()); + } + // std::cout <> substring; + for (int iword = 0; iword < elem_words_per_line; iword++) + { + // read portions of the line into the substring variable + line_parse >> substring; + // debug print + // std::cout<<" "<< substring; + // assign the substring variable as a word of the read buffer + strcpy(&read_buffer(buffer_loop, iword, 0), substring.c_str()); + } + // std::cout <= buffer_max) + { + element_temp.resize((num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line); + buffer_max = (num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line; + } + element_temp[(num_elems - 1) * elem_words_per_line + inode] = node_store(inode); + // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; + } + // assign global element id to temporary list + if (num_elems - 1 >= indices_buffer_max) + { + global_indices_temp.resize(num_elems - 1 + BUFFER_LINES); + indices_buffer_max = num_elems - 1 + BUFFER_LINES; + } + global_indices_temp[num_elems - 1] = elem_gid; + } + } + read_index_start += BUFFER_LINES; + } + + //set global and local shared element counts mesh.global_num_elems = global_num_elems; + // construct partition mapping for shared elements on each process + DCArrayKokkos All_Element_Global_Indices(num_elems); + // copy temporary global indices storage to view storage + for (int ielem = 0; ielem < num_elems; ielem++) + { + All_Element_Global_Indices.host(ielem) = global_indices_temp[ielem]; + if (global_indices_temp[ielem] < 0) + { + negative_index_found = 1; + } + } + + MPI_Allreduce(&negative_index_found, &global_negative_index_found, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + if (global_negative_index_found) + { + if (myrank == 0) + { + std::cout << "Node index less than or equal to zero detected; set \"zero_index_base: true\" under \"input_options\" in your yaml file if indices start at 0" << std::endl; + } + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + exit(0); + } + + // delete temporary element connectivity and index storage + std::vector().swap(element_temp); + std::vector().swap(global_indices_temp); + + All_Element_Global_Indices.update_device(); + + // construct global map of local and shared elements (since different ranks can own the same elements due to the local node map) + DistributedMap element_map = DistributedMap(All_Element_Global_Indices); + + //initialize elem data structures + mesh.initialize_elems(num_elems, num_nodes_in_elem, element_map); + + // copy temporary element storage to distributed storage + DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; + + for (int ielem = 0; ielem < num_elems; ielem++) + { + for (int inode = 0; inode < elem_words_per_line; inode++) + { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) + nodes_in_elem.host(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; + } + } + + // element type selection (subject to change) + // ---- Set Element Type ---- // + // allocate element type memory + // elements::elem_type_t* elem_choice; + + int NE = 1; // number of element types in problem + + // Convert ensight index system to the ijk finite element numbering convention + // for vertices in cell + CArrayKokkos convert_ensight_to_ijk(num_nodes_in_elem); + CArrayKokkos tmp_ijk_indx(num_nodes_in_elem); + convert_ensight_to_ijk(0) = 0; + convert_ensight_to_ijk(1) = 1; + convert_ensight_to_ijk(2) = 3; + convert_ensight_to_ijk(3) = 2; + convert_ensight_to_ijk(4) = 4; + convert_ensight_to_ijk(5) = 5; + convert_ensight_to_ijk(6) = 7; + convert_ensight_to_ijk(7) = 6; + + for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) + { + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) + { + tmp_ijk_indx(node_lid) = nodes_in_elem.host(cell_rid, convert_ensight_to_ijk(node_lid)); + } + + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) + { + nodes_in_elem.host(cell_rid, node_lid) = tmp_ijk_indx(node_lid); + } + } + + + nodes_in_elem.update_device(); + // initialize corner variables - int num_corners = mesh.num_elems * mesh.num_nodes_in_elem; + size_t num_corners = num_elems * num_nodes_in_elem; mesh.initialize_corners(num_corners); - // corner.initialize(num_corners, num_dim); // Build connectivity mesh.build_connectivity(); From 60fa2cf34f270c3d915d4b86da8cbf4bcdfd4aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 4 Jul 2025 19:37:37 -0600 Subject: [PATCH 27/66] WIP: MPI refactor --- .../src/common/include/mesh.h | 9 + .../src/common/include/mesh_io.h | 229 +++--------------- single-node-refactor/src/driver.cpp | 3 + 3 files changed, 49 insertions(+), 192 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index d81adb486..7744453aa 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -574,6 +574,15 @@ struct Mesh_t nodes_in_elem_temp.replace_kokkos_dual_view(nodes_in_elem.get_kokkos_dual_view()); nodes_in_elem = nodes_in_elem_temp; + //convert global ids stored in nodes_in_elem to local node ids spanning 0:num_nodes on this process + for(int ielem= 0; ielem < num_elems; ielem++) { + for(int inode = 0; inode < num_nodes_in_elem; inode++){ + nodes_in_elem(ielem, inode) = all_node_map.getLocalIndex(nodes_in_elem(ielem, inode)); + } + } + + nodes_in_elem.update_device(); + // element_map->describe(*fos,Teuchos::VERB_EXTREME); // element_map->describe(*fos,Teuchos::VERB_EXTREME); // create distributed multivector of the local node data and all (local + ghost) node storage diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 4edbb25da..d9153d5fa 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -529,9 +529,6 @@ class MeshReader // // Close mesh input file // fclose(in); - // // Build connectivity - // mesh.build_connectivity(); - // return; // } // end read ensight mesh @@ -692,8 +689,6 @@ class MeshReader // mesh.initialize_corners(num_corners); // // State.corner.initialize(num_corners, num_dims); - // // Build connectivity - // mesh.build_connectivity(); // } // end read abaqus mesh @@ -882,10 +877,6 @@ class MeshReader // mesh.initialize_corners(num_corners); - // // Build connectivity - // mesh.build_connectivity(); - - // found=false; // printf("\n"); @@ -1380,10 +1371,6 @@ class MeshReader exit(0); } - // delete temporary element connectivity and index storage - std::vector().swap(element_temp); - std::vector().swap(global_indices_temp); - All_Element_Global_Indices.update_device(); // construct global map of local and shared elements (since different ranks can own the same elements due to the local node map) @@ -1397,18 +1384,15 @@ class MeshReader for (int ielem = 0; ielem < num_elems; ielem++) { - for (int inode = 0; inode < elem_words_per_line; inode++) + for (int inode = 0; inode < num_nodes_in_elem; inode++) { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) - nodes_in_elem.host(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; + nodes_in_elem.host(ielem, inode) = element_temp[ielem * num_nodes_in_elem + inode]; } } - // element type selection (subject to change) - // ---- Set Element Type ---- // - // allocate element type memory - // elements::elem_type_t* elem_choice; - - int NE = 1; // number of element types in problem + // delete temporary element connectivity and index storage + std::vector().swap(element_temp); + std::vector().swap(global_indices_temp); // Convert ensight index system to the ijk finite element numbering convention // for vertices in cell @@ -1443,9 +1427,6 @@ class MeshReader size_t num_corners = num_elems * num_nodes_in_elem; mesh.initialize_corners(num_corners); - // Build connectivity - mesh.build_connectivity(); - // Close mesh input file if (myrank == 0) { @@ -1759,10 +1740,6 @@ class MeshReader // mesh.initialize_corners(num_corners); - // // Build connectivity - // mesh.build_connectivity(); - - // in.close(); // } // end of VTMread function @@ -1957,8 +1934,6 @@ class MeshBuilder // mesh.initialize_corners(num_corners); // // corner.initialize(num_corners, num_dim); - // // Build connectivity - // mesh.build_connectivity(); // } // end build_2d_box ///////////////////////////////////////////////////////////////////////////// @@ -2092,8 +2067,6 @@ class MeshBuilder // mesh.initialize_corners(num_corners); // // corner.initialize(num_corners, num_dim); - // // Build connectivity - // mesh.build_connectivity(); // } // end build_2d_box ///////////////////////////////////////////////////////////////////////////// @@ -2121,6 +2094,9 @@ class MeshBuilder /*currently we just build the global mesh data on rank 0 and then broadcast relevant data to each rank before the global mesh data on rank 0 falls out of scope*/ int global_num_nodes, global_num_elems; + CArrayKokkos read_buffer; + CArrayKokkos global_coords; + CArrayKokkos global_nodes_in_elem; int local_node_index, current_column_index; int buffer_loop, buffer_iteration, buffer_iterations, dof_limit, scan_loop; @@ -2160,7 +2136,7 @@ class MeshBuilder const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) - const int global_num_elems = num_elems_i * num_elems_j * num_elems_k; + global_num_elems = num_elems_i * num_elems_j * num_elems_k; std::vector origin(num_dims); // SimulationParameters.mesh_input.origin.update_host(); @@ -2173,7 +2149,7 @@ class MeshBuilder // const int num_edges_in_elem = 12; // number of edges in a elem // node coords data on rank 0 for all global nodes - CArrayKokkos global_coords(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); + global_coords = CArrayKokkos(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); // --- Build nodes --- @@ -2194,7 +2170,7 @@ class MeshBuilder // initialize elem variables - DCArrayKokkos global_nodes_in_elem(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); + global_nodes_in_elem = CArrayKokkos(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); // --- Build elems --- @@ -2237,7 +2213,6 @@ class MeshBuilder DistributedMap map; // read coords read_index_start = 0; - size_t num_local_nodes; buffer_iterations = global_num_nodes / BUFFER_LINES; if (global_num_nodes % BUFFER_LINES != 0) @@ -2360,42 +2335,8 @@ class MeshBuilder // read in element info (ensight file format is organized in element type sections) // loop over this later for several element type sections - - size_t global_num_elems = 0; - size_t num_elems = 0; CArrayKokkos node_store(num_nodes_in_elem); - // --- read the number of cells in the mesh --- - // --- Read the number of vertices in the mesh --- // - if (myrank == 0) - { - bool found = false; - while (found == false&&in.good()) { - std::getline(in, read_line); - line_parse.str(""); - line_parse.clear(); - line_parse << read_line; - line_parse >> substring; - - // looking for the following text: - // CELLS num_cells size - if (substring == "CELLS") - { - line_parse >> global_num_elems; - std::cout << "declared element count: " << global_num_elems << std::endl; - if (global_num_elems <= 0) - { - throw std::runtime_error("ERROR, NO ELEMENTS IN MESH"); - } - found = true; - } // end if - } // end while - - if (!found){ - throw std::runtime_error("ERROR: Failed to find CELLS"); - } // end if - } // end if(myrank==0) - // broadcast number of elements MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); @@ -2408,17 +2349,18 @@ class MeshBuilder // read in element connectivity // we're gonna reallocate for the words per line expected for the element connectivity - read_buffer = CArrayKokkos(BUFFER_LINES, elem_words_per_line, MAX_WORD); + read_buffer = CArrayKokkos(BUFFER_LINES, num_nodes_in_elem); // calculate buffer iterations to read number of lines buffer_iterations = global_num_elems / BUFFER_LINES; int assign_flag; // dynamic buffer used to store elements before we know how many this rank needs - std::vector element_temp(BUFFER_LINES * elem_words_per_line); + std::vector element_temp(BUFFER_LINES * num_nodes_in_elem); std::vector global_indices_temp(BUFFER_LINES); - size_t buffer_max = BUFFER_LINES * elem_words_per_line; + size_t buffer_max = BUFFER_LINES * num_nodes_in_elem; size_t indices_buffer_max = BUFFER_LINES; + size_t num_elems = 0; if (global_num_elems % BUFFER_LINES != 0) { @@ -2433,19 +2375,9 @@ class MeshBuilder { for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) { - getline(in, read_line); - line_parse.clear(); - line_parse.str(read_line); - // disregard node count line since we're using one element type per mesh - line_parse >> substring; - for (int iword = 0; iword < elem_words_per_line; iword++) + for (int inode = 0; inode < num_nodes_in_elem; inode++) { - // read portions of the line into the substring variable - line_parse >> substring; - // debug print - // std::cout<<" "<< substring; - // assign the substring variable as a word of the read buffer - strcpy(&read_buffer(buffer_loop, iword, 0), substring.c_str()); + read_buffer(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_LINES + buffer_loop, inode); } // std::cout <> substring; - for (int iword = 0; iword < elem_words_per_line; iword++) + for (int inode = 0; inode < num_nodes_in_elem; inode++) { - // read portions of the line into the substring variable - line_parse >> substring; - // debug print - // std::cout<<" "<< substring; - // assign the substring variable as a word of the read buffer - strcpy(&read_buffer(buffer_loop, iword, 0), substring.c_str()); + read_buffer(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_LINES + buffer_loop, inode); } // std::cout <= buffer_max) + if ((num_elems - 1) * num_nodes_in_elem + inode >= buffer_max) { - element_temp.resize((num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line); - buffer_max = (num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line; + element_temp.resize((num_elems - 1) * num_nodes_in_elem + inode + BUFFER_LINES * num_nodes_in_elem); + buffer_max = (num_elems - 1) * num_nodes_in_elem + inode + BUFFER_LINES * num_nodes_in_elem; } - element_temp[(num_elems - 1) * elem_words_per_line + inode] = node_store(inode); + element_temp[(num_elems - 1) * num_nodes_in_elem + inode] = node_store(inode); // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; } // assign global element id to temporary list @@ -2556,28 +2457,8 @@ class MeshBuilder for (int ielem = 0; ielem < num_elems; ielem++) { All_Element_Global_Indices.host(ielem) = global_indices_temp[ielem]; - if (global_indices_temp[ielem] < 0) - { - negative_index_found = 1; - } - } - - MPI_Allreduce(&negative_index_found, &global_negative_index_found, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); - if (global_negative_index_found) - { - if (myrank == 0) - { - std::cout << "Node index less than or equal to zero detected; set \"zero_index_base: true\" under \"input_options\" in your yaml file if indices start at 0" << std::endl; - } - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); - exit(0); } - // delete temporary element connectivity and index storage - std::vector().swap(element_temp); - std::vector().swap(global_indices_temp); - All_Element_Global_Indices.update_device(); // construct global map of local and shared elements (since different ranks can own the same elements due to the local node map) @@ -2591,54 +2472,21 @@ class MeshBuilder for (int ielem = 0; ielem < num_elems; ielem++) { - for (int inode = 0; inode < elem_words_per_line; inode++) + for (int inode = 0; inode < num_nodes_in_elem; inode++) { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) - nodes_in_elem.host(ielem, inode) = element_temp[ielem * elem_words_per_line + inode]; + nodes_in_elem.host(ielem, inode) = element_temp[ielem * num_nodes_in_elem + inode]; } } - // element type selection (subject to change) - // ---- Set Element Type ---- // - // allocate element type memory - // elements::elem_type_t* elem_choice; - - int NE = 1; // number of element types in problem - - // Convert ensight index system to the ijk finite element numbering convention - // for vertices in cell - CArrayKokkos convert_ensight_to_ijk(num_nodes_in_elem); - CArrayKokkos tmp_ijk_indx(num_nodes_in_elem); - convert_ensight_to_ijk(0) = 0; - convert_ensight_to_ijk(1) = 1; - convert_ensight_to_ijk(2) = 3; - convert_ensight_to_ijk(3) = 2; - convert_ensight_to_ijk(4) = 4; - convert_ensight_to_ijk(5) = 5; - convert_ensight_to_ijk(6) = 7; - convert_ensight_to_ijk(7) = 6; - - for (int cell_rid = 0; cell_rid < num_elems; cell_rid++) - { - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) - { - tmp_ijk_indx(node_lid) = nodes_in_elem.host(cell_rid, convert_ensight_to_ijk(node_lid)); - } - - for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) - { - nodes_in_elem.host(cell_rid, node_lid) = tmp_ijk_indx(node_lid); - } - } - - nodes_in_elem.update_device(); + + // delete temporary element connectivity and index storage + std::vector().swap(element_temp); + std::vector().swap(global_indices_temp); // initialize corner variables size_t num_corners = num_elems * num_nodes_in_elem; mesh.initialize_corners(num_corners); - - // Build connectivity - mesh.build_connectivity(); } // end build_3d_box ///////////////////////////////////////////////////////////////////////////// @@ -2808,9 +2656,6 @@ class MeshBuilder // mesh.initialize_corners(num_corners); // // corner.initialize(num_corners, num_dim); - // // Build connectivity - // mesh.build_connectivity(); - // } }; diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index 2c3826572..8c3936945 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -109,6 +109,9 @@ void Driver::initialize() //build relevant partition maps for ghost nodes, elements, etc. mesh.init_maps(State.node); + // Build connectivity + mesh.build_connectivity(); + // Build boundary conditions const int num_bcs = BoundaryConditions.num_bcs; From 52b38457f9e4218f1c0edfb11fe4ad1471be2637 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz <46537160+Adrian-Diaz@users.noreply.github.com> Date: Mon, 7 Jul 2025 10:44:36 -0600 Subject: [PATCH 28/66] WIP: add node mass comms at setup --- .../src/Solvers/SGH_solver_3D/src/sgh_setup.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp index de49d03b1..f72f99af0 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp @@ -110,5 +110,7 @@ void SGH3D::setup(SimulationParameters_t& SimulationParameters, State.node.mass, State.corner.mass); + //communicate node masses to ghosts + node_mass_comms.execute_comms(); } // end SGH setup From 9ba625f6955959767259994d60eea120aa5f1c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 8 Jul 2025 20:30:04 -0600 Subject: [PATCH 29/66] WIP: MPI refactor --- .../src/common/include/mesh.h | 84 ++++++++++++------- .../src/common/include/mesh_io.h | 40 ++++----- 2 files changed, 75 insertions(+), 49 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 7744453aa..b4578cdc1 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -329,13 +329,16 @@ struct Mesh_t }; // end method // initialization methods - void initialize_elems(const size_t num_elems_inp, const size_t num_nodes_in_elem, const DistributedMap input_element_map) + void initialize_elems(const size_t num_elems_inp, const size_t input_num_nodes_in_elem, const DistributedMap input_element_map) { num_elems = num_elems_inp; element_map = input_element_map; nodes_in_elem = DistributedDCArray(element_map, num_nodes_in_elem, "mesh.nodes_in_elem"); corners_in_elem = CArrayKokkos(num_elems, num_nodes_in_elem, "mesh.corners_in_elem"); + //number of nodes per element + num_nodes_in_elem = input_num_nodes_in_elem; + // 1 Gauss point per element num_leg_gauss_in_elem = 1; @@ -367,6 +370,7 @@ struct Mesh_t num_surfs_in_elem = num_surfs_in_elem_inp; num_zones = num_zones_in_elem * num_elems; + element_map = input_element_map; nodes_in_elem = DistributedDCArray(element_map, num_nodes_in_elem, "mesh.nodes_in_elem"); corners_in_elem = CArrayKokkos(num_elems, num_nodes_in_elem, "mesh.corners_in_elem"); @@ -422,34 +426,44 @@ struct Mesh_t // now pass the contents of the set over to a CArrayKokkos, then create a map to find local ghost indices from global ghost indices num_ghost_nodes = ghost_node_set.size(); - int ighost = 0; - auto it = ghost_node_set.begin(); - - // create a Map for ghost node indices - ghost_nodes = DCArrayKokkos(num_ghost_nodes, "ghost_nodes"); //pass this into map object - while (it != ghost_node_set.end()) { - ghost_nodes.host(ighost++) = *it; - it++; - } - ghost_nodes.update_device(); - - //Use the ranks to break ties in shared element assignment for a unique element map used in elem set reductions later - //this wont be that great at load balancing element counts but its simple and works for now - ghost_node_ranks = DCArrayKokkos(num_ghost_nodes, "ghost_nodes_ranks"); - - // debug print of ghost nodes - // std::cout << " GHOST NODE SET ON TASK " << myrank << std::endl; - // for(int i = 0; i < num_ghost_nodes; i++) - // std::cout << "{" << i + 1 << "," << ghost_nodes(i) + 1 << "}" << std::endl; - - // find which mpi rank each ghost node belongs to and store the information in a CArrayKokkos - // // allocate Teuchos Views since they are the only input available at the moment in the Tpetra map definitions - // Teuchos::ArrayView ghost_nodes_pass(ghost_nodes.h_view.data(), num_ghost_nodes); + if(num_ghost_nodes){ + int ighost = 0; + auto it = ghost_node_set.begin(); + + // create a Map for ghost node indices + ghost_nodes = DCArrayKokkos(num_ghost_nodes, "ghost_nodes"); //pass this into map object + while (it != ghost_node_set.end()) { + ghost_nodes.host(ighost++) = *it; + it++; + } + ghost_nodes.update_device(); - // Teuchos::ArrayView ghost_node_ranks_pass(ghost_node_ranks.h_view.data(), num_ghost_nodes); + //Use the ranks to break ties in shared element assignment for a unique element map used in elem set reductions later + //this wont be that great at load balancing element counts but its simple and works for now + ghost_node_ranks = DCArrayKokkos(num_ghost_nodes, "ghost_nodes_ranks"); + } + else{ + //ensure a size of at least 1 with bogus index to prevent segfault + ghost_nodes = DCArrayKokkos(1, "ghost_nodes"); //pass this into map object + ghost_node_ranks = DCArrayKokkos(1, "ghost_nodes_ranks"); - node_map.getRemoteIndexList(ghost_nodes, ghost_node_ranks); - ghost_node_ranks.update_device(); + } + // debug print of ghost nodes + // std::cout << " GHOST NODE SET ON TASK " << myrank << std::endl; + // for(int i = 0; i < num_ghost_nodes; i++) + // std::cout << "{" << i + 1 << "," << ghost_nodes(i) + 1 << "}" << std::endl; + + // find which mpi rank each ghost node belongs to and store the information in a CArrayKokkos + // // allocate Teuchos Views since they are the only input available at the moment in the Tpetra map definitions + // Teuchos::ArrayView ghost_nodes_pass(ghost_nodes.h_view.data(), num_ghost_nodes); + + // Teuchos::ArrayView ghost_node_ranks_pass(ghost_node_ranks.h_view.data(), num_ghost_nodes); + //node_map.print(); + node_map.getRemoteIndexList(ghost_nodes, ghost_node_ranks); + if(num_ghost_nodes){ + ghost_node_ranks.update_device(); + } + } ghost_node_map = DistributedMap(ghost_nodes); @@ -535,6 +549,7 @@ struct Mesh_t long long int temp_element_gid, current_element_gid; int last_storage_index = num_elems - 1; + for (int ielem = 0; ielem < num_local_elems; ielem++) { @@ -567,21 +582,30 @@ struct Mesh_t // reset all element map to its re-sorted version Initial_Element_Global_Indices.update_device(); nodes_in_elem.update_device(); - + element_map = DistributedMap(Initial_Element_Global_Indices); //redefine nodes_in_elem so partition map of the distributed array is synchronized with permuted dual view contents DistributedDCArray nodes_in_elem_temp(element_map, num_nodes_in_elem); - nodes_in_elem_temp.replace_kokkos_dual_view(nodes_in_elem.get_kokkos_dual_view()); + //nodes_in_elem_temp.replace_kokkos_dual_view(nodes_in_elem.get_kokkos_dual_view()); + //nodes_in_elem.print(); + std::cout << "NUM ELEMS " << num_elems << " NUM NODES IN ELEM " << num_nodes_in_elem << std::endl; + for(int ielem= 0; ielem < num_elems; ielem++) { + for(int inode = 0; inode < num_nodes_in_elem; inode++){ + nodes_in_elem_temp.host(ielem, inode) = nodes_in_elem.host(ielem, inode); + } + } + //nodes_in_elem_temp.update_device(); nodes_in_elem = nodes_in_elem_temp; //convert global ids stored in nodes_in_elem to local node ids spanning 0:num_nodes on this process for(int ielem= 0; ielem < num_elems; ielem++) { for(int inode = 0; inode < num_nodes_in_elem; inode++){ - nodes_in_elem(ielem, inode) = all_node_map.getLocalIndex(nodes_in_elem(ielem, inode)); + nodes_in_elem.host(ielem, inode) = all_node_map.getLocalIndex(nodes_in_elem(ielem, inode)); } } nodes_in_elem.update_device(); + nodes_in_elem.print(); // element_map->describe(*fos,Teuchos::VERB_EXTREME); // element_map->describe(*fos,Teuchos::VERB_EXTREME); diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index d9153d5fa..32565d9a5 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1033,18 +1033,18 @@ class MeshReader // read coords read_index_start = 0; size_t num_local_nodes; - DistributedMap map; + DistributedMap node_map; { //scoped so temp FArray data is auto deleted to save memory //allocate pre-partition node coords using contiguous decomposition //FArray type used since CArray type still doesnt support zoltan2 decomposition DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); // construct contiguous parallel row map now that we know the number of nodes - map = node_coords_distributed.pmap; + node_map = node_coords_distributed.pmap; // map->describe(*fos,Teuchos::VERB_EXTREME); // set the vertices in the mesh read in - num_local_nodes = map.size(); + num_local_nodes = node_map.size(); for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) { // pack buffer on rank 0 @@ -1105,10 +1105,10 @@ class MeshReader // set global node id (ensight specific order) node_gid = read_index_start + scan_loop; // let map decide if this node id belongs locally; if yes store data - if (map.isProcessGlobalIndex(node_gid)) + if (node_map.isProcessGlobalIndex(node_gid)) { // set local node index in this mpi rank - node_rid = map.getLocalIndex(node_gid); + node_rid = node_map.getLocalIndex(node_gid); // extract nodal position from the read buffer // for tecplot format this is the three coords in the same line dof_value = atof(&read_buffer(scan_loop, 0, 0)); @@ -1130,14 +1130,15 @@ class MeshReader node_coords_distributed.repartition_vector(); //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope std::vector required_node_state = { node_state::coords }; - map = node_coords_distributed.pmap; - node.initialize(map, num_dims, required_node_state); + node_map = node_coords_distributed.pmap; + node.initialize(node_map, num_dims, required_node_state); } //initialize some mesh data mesh.initialize_nodes(global_num_nodes); - num_local_nodes = map.size(); + num_local_nodes = node_map.size(); mesh.num_local_nodes = num_local_nodes; + mesh.node_map = node_map; // debug print of nodal data @@ -1304,7 +1305,7 @@ class MeshReader // first we add the elements to a dynamically allocated list if (zero_index_base) { - if (map.isProcessGlobalIndex(node_gid) && !assign_flag) + if (node_map.isProcessGlobalIndex(node_gid) && !assign_flag) { assign_flag = 1; num_elems++; @@ -1312,7 +1313,7 @@ class MeshReader } else { - if (map.isProcessGlobalIndex(node_gid - 1) && !assign_flag) + if (node_map.isProcessGlobalIndex(node_gid - 1) && !assign_flag) { assign_flag = 1; num_elems++; @@ -2210,7 +2211,7 @@ class MeshBuilder //distribute partitioned data from the global mesh build data on rank 0 size_t num_local_nodes; - DistributedMap map; + DistributedMap node_map; // read coords read_index_start = 0; @@ -2228,11 +2229,11 @@ class MeshBuilder DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); // construct contiguous parallel row map now that we know the number of nodes - map = node_coords_distributed.pmap; + node_map = node_coords_distributed.pmap; // map->describe(*fos,Teuchos::VERB_EXTREME); // set the vertices in the mesh read in - num_local_nodes = map.size(); + num_local_nodes = node_map.size(); for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) { // pack buffer on rank 0 @@ -2283,10 +2284,10 @@ class MeshBuilder // set global node id (ensight specific order) node_gid = read_index_start + scan_loop; // let map decide if this node id belongs locally; if yes store data - if (map.isProcessGlobalIndex(node_gid)) + if (node_map.isProcessGlobalIndex(node_gid)) { // set local node index in this mpi rank - node_rid = map.getLocalIndex(node_gid); + node_rid = node_map.getLocalIndex(node_gid); // extract nodal position from the read buffer // for tecplot format this is the three coords in the same line dof_value = read_buffer(scan_loop,0); @@ -2308,14 +2309,15 @@ class MeshBuilder node_coords_distributed.repartition_vector(); //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope std::vector required_node_state = { node_state::coords }; - map = node_coords_distributed.pmap; - node.initialize(map, num_dims, required_node_state); + node_map = node_coords_distributed.pmap; + node.initialize(node_map, num_dims, required_node_state); } //initialize some mesh data mesh.initialize_nodes(global_num_nodes); - num_local_nodes = map.size(); + num_local_nodes = node_map.size(); mesh.num_local_nodes = num_local_nodes; + mesh.node_map = node_map; // debug print of nodal data @@ -2417,7 +2419,7 @@ class MeshBuilder node_gid = read_buffer(scan_loop, inode); node_store(inode) = node_gid; // subtract 1 since file index start is 1 but code expects 0 // first we add the elements to a dynamically allocated list - if (map.isProcessGlobalIndex(node_gid) && !assign_flag) + if (node_map.isProcessGlobalIndex(node_gid) && !assign_flag) { assign_flag = 1; num_elems++; From 32e528587ab7406cc560a3225d85de4ddd62ef56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 11 Jul 2025 00:50:53 -0600 Subject: [PATCH 30/66] WIP: MPI refactor --- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 99 +++++--- .../src/common/include/mesh.h | 6 +- .../src/common/include/mesh_io.h | 54 ++-- .../src/common/include/state.h | 237 ++++++++++-------- .../src/common/src/region_fill.cpp | 32 ++- 5 files changed, 268 insertions(+), 160 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index f4f45ca99..5f1a9cf3d 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -84,6 +84,10 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, CArrayKokkos GaussPoint_pres_denominator(mesh.num_elems*mesh.num_leg_gauss_in_elem); CArrayKokkos GaussPoint_volfrac_min(mesh.num_elems*mesh.num_leg_gauss_in_elem); CArrayKokkos GaussPoint_volfrac_limiter(mesh.num_elems*mesh.num_leg_gauss_in_elem); + + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); // Create mesh writer @@ -94,8 +98,9 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, graphics_times(0) = this->time_start; // was zero double graphics_time = this->time_start; // the times for writing graphics dump, was started at 0.0 size_t output_id=0; // the id for the outputs written - - std::cout << "Applying initial boundary conditions" << std::endl; + if(myrank==0){ + std::cout << "Applying initial boundary conditions" << std::endl; + } boundary_velocity(mesh, BoundaryConditions, State.node.vel, time_value); // Time value = 0.0; // extensive energy tallies over the entire mesh @@ -116,7 +121,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, local_IE_t0 += sum_domain_internal_energy(State.MaterialPoints.mass, State.MaterialPoints.sie, State.MaterialToMeshMaps, - State.MaterialPoints.num_material_points.host(mat_id), + State.MaterialPoints.num_material_local_points.host(mat_id), mat_id, mesh.num_local_elems); } // end loop over mat_id @@ -137,23 +142,38 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, // domain mass for each material (they are at material points) double mass_domain_all_mats_t0 = 0.0; double mass_domain_nodes_t0 = 0.0; + double global_mass_domain_nodes_t0; + double global_mass_domain_all_mats_t0; - for (size_t mat_id = 0; mat_id < num_mats; mat_id++) { + //debug print of mass + // for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ + // std::cout << State.MaterialPoints.mass(0,ielem) << " " << std::endl; + // } + for (size_t mat_id = 0; mat_id < num_mats; mat_id++) { + double global_mass_domain_mat; + std::cout << " local element count for mass loop " << State.MaterialPoints.num_material_local_points.host(mat_id) << std::endl; double mass_domain_mat = sum_domain_material_mass(State.MaterialPoints.mass, - State.MaterialPoints.num_material_points.host(mat_id), + State.MaterialPoints.num_material_local_points.host(mat_id), mat_id); mass_domain_all_mats_t0 += mass_domain_mat; - printf("material %zu mass in domain = %f \n", mat_id, mass_domain_mat); + + MPI_Allreduce(&mass_domain_mat, &global_mass_domain_mat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + if(myrank==0){ + printf("material %zu mass in domain = %f \n", mat_id, global_mass_domain_mat); + } } // end for // node mass of the domain mass_domain_nodes_t0 = sum_domain_node_mass(mesh, State.node.coords, State.node.mass); - - printf("nodal mass domain = %f \n", mass_domain_nodes_t0); + MPI_Allreduce(&mass_domain_nodes_t0, &global_mass_domain_nodes_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&mass_domain_all_mats_t0, &global_mass_domain_all_mats_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + if(myrank==0){ + printf("nodal mass domain = %f \n", global_mass_domain_nodes_t0); + } // a flag to exit the calculation size_t stop_calc = 0; @@ -161,7 +181,9 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, auto time_1 = std::chrono::high_resolution_clock::now(); // Write initial state at t=0 - printf("Writing outputs to file at %f \n", graphics_time); + if(myrank==0){ + printf("Writing outputs to file at %f \n", graphics_time); + } mesh_writer.write_mesh( mesh, State, @@ -223,14 +245,15 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, //Find the minimum timestep across all MPI processes MPI_Allreduce(&min_dt_calc, &dt, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); - - if (cycle == 0) { - printf("cycle = %lu, time = %f, time step = %f \n", cycle, time_value, dt); + if(myrank==0){ + if (cycle == 0) { + printf("cycle = %lu, time = %.8f, time step = %.8f \n", cycle, time_value, dt); + } + // print time step every 10 cycles + else if (cycle % 20 == 0) { + printf("cycle = %lu, time = %.8f, time step = %.8f \n", cycle, time_value, dt); + } // end if } - // print time step every 10 cycles - else if (cycle % 20 == 0) { - printf("cycle = %lu, time = %f, time step = %f \n", cycle, time_value, dt); - } // end if // --------------------------------------------------------------------- @@ -489,7 +512,9 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, // write outputs if (write == 1) { - printf("Writing outputs to file at %f \n", graphics_time); + if(myrank==0){ + printf("Writing outputs to file at %f \n", graphics_time); + } mesh_writer.write_mesh(mesh, State, SimulationParameters, @@ -514,8 +539,9 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, auto time_2 = std::chrono::high_resolution_clock::now(); auto calc_time = std::chrono::duration_cast(time_2 - time_1).count(); - - printf("\nCalculation time in seconds: %f \n", calc_time * 1e-9); + if(myrank==0){ + printf("\nCalculation time in seconds: %f \n", calc_time * 1e-9); + } // ---- Calculate energy tallies ---- double IE_tend = 0.0; @@ -530,7 +556,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, local_IE_tend += sum_domain_internal_energy(State.MaterialPoints.mass, State.MaterialPoints.sie, State.MaterialToMeshMaps, - State.MaterialPoints.num_material_points.host(mat_id), + State.MaterialPoints.num_material_local_points.host(mat_id), mat_id, mesh.num_local_elems); } // end loop over mat_id @@ -541,37 +567,43 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, State.node.coords, State.node.mass); - MPI_Allreduce(&local_IE_t0, &IE_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&local_KE_t0, &KE_t0, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_IE_tend, &IE_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&local_KE_tend, &KE_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // extensive TE TE_tend = IE_tend + KE_tend; - - printf("Time=0: KE = %.14f, IE = %.14f, TE = %.14f \n", KE_t0, IE_t0, TE_t0); - printf("Time=End: KE = %.14f, IE = %.14f, TE = %.14f \n", KE_tend, IE_tend, TE_tend); - printf("total energy change = %.15e \n\n", TE_tend - TE_t0); + if(myrank==0){ + printf("Time=0: KE = %.14f, IE = %.14f, TE = %.14f \n", KE_t0, IE_t0, TE_t0); + printf("Time=End: KE = %.14f, IE = %.14f, TE = %.14f \n", KE_tend, IE_tend, TE_tend); + printf("total energy change = %.15e \n\n", TE_tend - TE_t0); + } // domain mass for each material (they are at material points) double mass_domain_all_mats_tend = 0.0; double mass_domain_nodes_tend = 0.0; + double global_mass_domain_all_mats_tend; + double global_mass_domain_nodes_tend; for(size_t mat_id = 0; mat_id < num_mats; mat_id++){ - double mass_domain_mat = sum_domain_material_mass(State.MaterialPoints.mass, - State.MaterialPoints.num_material_points.host(mat_id), + State.MaterialPoints.num_material_local_points.host(mat_id), mat_id); mass_domain_all_mats_tend += mass_domain_mat; } // end for + MPI_Allreduce(&mass_domain_all_mats_tend, &global_mass_domain_all_mats_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); // node mass of the domain mass_domain_nodes_tend = sum_domain_node_mass(mesh, State.node.coords, State.node.mass); - - printf("material mass conservation error = %f \n", mass_domain_all_mats_tend - mass_domain_all_mats_t0); - printf("nodal mass conservation error = %f \n", mass_domain_nodes_tend - mass_domain_nodes_t0); - printf("nodal and material mass error = %f \n\n", mass_domain_nodes_tend - mass_domain_all_mats_tend); + + MPI_Allreduce(&mass_domain_all_mats_tend, &global_mass_domain_all_mats_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + if(myrank==0){ + printf("material mass conservation error = %f \n", global_mass_domain_all_mats_tend - global_mass_domain_all_mats_t0); + printf("nodal mass conservation error = %f \n", global_mass_domain_nodes_tend - global_mass_domain_nodes_t0); + printf("nodal and material mass error = %f \n\n", global_mass_domain_nodes_tend - global_mass_domain_all_mats_tend); + } } // end of SGH execute ///////////////////////////////////////////////////////////////////////////// @@ -666,7 +698,6 @@ double sum_domain_internal_energy( // loop over the material points and tally IE FOR_REDUCE_SUM(matpt_lid, 0, num_mat_points, IE_loc_sum, { - if(MaterialToMeshMaps.elem(mat_id, matpt_lid) < num_local_elems) IE_loc_sum += MaterialPoints_mass(mat_id,matpt_lid) * MaterialPoints_sie(mat_id,matpt_lid); }, IE_sum); Kokkos::fence(); @@ -757,7 +788,7 @@ double sum_domain_node_mass(const Mesh_t& mesh, double mass_domain = 0.0; double mass_loc_domain; - FOR_REDUCE_SUM(node_gid, 0, mesh.num_nodes, mass_loc_domain, { + FOR_REDUCE_SUM(node_gid, 0, mesh.num_local_nodes, mass_loc_domain, { if (mesh.num_dims == 2) { mass_loc_domain += node_mass(node_gid) * node_coords(node_gid, 1); } diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index b4578cdc1..88962eb12 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -333,12 +333,13 @@ struct Mesh_t { num_elems = num_elems_inp; element_map = input_element_map; - nodes_in_elem = DistributedDCArray(element_map, num_nodes_in_elem, "mesh.nodes_in_elem"); - corners_in_elem = CArrayKokkos(num_elems, num_nodes_in_elem, "mesh.corners_in_elem"); //number of nodes per element num_nodes_in_elem = input_num_nodes_in_elem; + nodes_in_elem = DistributedDCArray(element_map, num_nodes_in_elem, "mesh.nodes_in_elem"); + corners_in_elem = CArrayKokkos(num_elems, num_nodes_in_elem, "mesh.corners_in_elem"); + // 1 Gauss point per element num_leg_gauss_in_elem = 1; @@ -605,7 +606,6 @@ struct Mesh_t } nodes_in_elem.update_device(); - nodes_in_elem.print(); // element_map->describe(*fos,Teuchos::VERB_EXTREME); // element_map->describe(*fos,Teuchos::VERB_EXTREME); diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 32565d9a5..9b0e9d795 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1132,6 +1132,12 @@ class MeshReader std::vector required_node_state = { node_state::coords }; node_map = node_coords_distributed.pmap; node.initialize(node_map, num_dims, required_node_state); + //copy coordinate data from repartitioned FArray into CArray + FOR_ALL(node_id, 0, node_map.size(), { + for(int idim = 0; idim < num_dims; idim++){ + node.coords(node_id,idim) = node_coords_distributed(node_id,idim); + } + }); } //initialize some mesh data @@ -2116,29 +2122,31 @@ class MeshBuilder } if(myrank==0){ printf("Creating a 3D box mesh \n"); + } - // SimulationParameters.mesh_input.length.update_host(); - const double lx = SimulationParameters.mesh_input.length[0]; - const double ly = SimulationParameters.mesh_input.length[1]; - const double lz = SimulationParameters.mesh_input.length[2]; - - // SimulationParameters.mesh_input.num_elems.update_host(); - const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; - const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; - const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; + // SimulationParameters.mesh_input.length.update_host(); + const double lx = SimulationParameters.mesh_input.length[0]; + const double ly = SimulationParameters.mesh_input.length[1]; + const double lz = SimulationParameters.mesh_input.length[2]; - const int num_points_i = num_elems_i + 1; // num points in x - const int num_points_j = num_elems_j + 1; // num points in y - const int num_points_k = num_elems_k + 1; // num points in y + // SimulationParameters.mesh_input.num_elems.update_host(); + const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + const int num_elems_k = SimulationParameters.mesh_input.num_elems[2]; - global_num_nodes = num_points_i * num_points_j * num_points_k; + const int num_points_i = num_elems_i + 1; // num points in x + const int num_points_j = num_elems_j + 1; // num points in y + const int num_points_k = num_elems_k + 1; // num points in y - const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) - const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) - const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) + global_num_nodes = num_points_i * num_points_j * num_points_k; - global_num_elems = num_elems_i * num_elems_j * num_elems_k; + const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) + const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) + const double dz = lz / ((double)num_elems_k); // len/(num_elems_k) + global_num_elems = num_elems_i * num_elems_j * num_elems_k; + + if(myrank==0){ std::vector origin(num_dims); // SimulationParameters.mesh_input.origin.update_host(); for (int i = 0; i < num_dims; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } @@ -2311,6 +2319,12 @@ class MeshBuilder std::vector required_node_state = { node_state::coords }; node_map = node_coords_distributed.pmap; node.initialize(node_map, num_dims, required_node_state); + //copy coordinate data from repartitioned FArray into CArray + FOR_ALL(node_id, 0, node_map.size(), { + for(int idim = 0; idim < num_dims; idim++){ + node.coords(node_id,idim) = node_coords_distributed(node_id,idim); + } + }); } //initialize some mesh data @@ -2318,6 +2332,7 @@ class MeshBuilder num_local_nodes = node_map.size(); mesh.num_local_nodes = num_local_nodes; mesh.node_map = node_map; + //node.coords.print(); // debug print of nodal data @@ -2449,7 +2464,6 @@ class MeshBuilder } read_index_start += BUFFER_LINES; } - //set global and local shared element counts mesh.global_num_elems = global_num_elems; @@ -2483,8 +2497,8 @@ class MeshBuilder nodes_in_elem.update_device(); // delete temporary element connectivity and index storage - std::vector().swap(element_temp); - std::vector().swap(global_indices_temp); + //std::vector().swap(element_temp); + //std::vector().swap(global_indices_temp); // initialize corner variables size_t num_corners = num_elems * num_nodes_in_elem; diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index aa332aabd..ae57d984b 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -374,127 +374,148 @@ struct node_t for (auto field : node_states){ switch(field){ case node_state::coords: - //store local data with existing managed view made by mesh read for now - if(this->local_coords.size()==0&&this->coords.size()!=0){ - this->local_coords = this->coords; + //if both local and all vector were already allocated skip + if(this->coords.size()==0||this->local_coords.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_coords.size()==0&&this->coords.size()!=0){ + this->local_coords = this->coords; + } + if(this->local_coords_n0.size()==0&&this->coords_n0.size()!=0){ + this->local_coords_n0 = this->coords_n0; + } + //storage for nlocal+nghost + this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); + this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); + //assign local data to new storage if local data was allocated + if(this->local_coords.size()!=0){ + super_vector_initialization(this->coords, this->local_coords, subview_map.size()); + } + if(this->local_coords_n0.size()!=0){ + super_vector_initialization(this->coords_n0, this->local_coords_n0, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_coords = DistributedDCArray(this->coords,subview_map); + this->local_coords_n0 = DistributedDCArray(this->coords_n0,subview_map); } - if(this->local_coords_n0.size()==0&&this->coords_n0.size()!=0){ - this->local_coords_n0 = this->coords_n0; - } - //storage for nlocal+nghost - this->coords = DistributedDCArray(partitioned_map, num_dims, "node_coordinates"); - this->coords_n0 = DistributedDCArray(partitioned_map, num_dims, "node_coordinates_n0"); - //assign local data to new storage if local data was allocated - if(this->local_coords.size()!=0){ - super_vector_initialization(this->coords, this->local_coords, subview_map.size()); - } - if(this->local_coords_n0.size()!=0){ - super_vector_initialization(this->coords_n0, this->local_coords_n0, subview_map.size()); - } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_coords = DistributedDCArray(this->coords,subview_map); - this->local_coords_n0 = DistributedDCArray(this->coords_n0,subview_map); break; case node_state::velocity: - //store local data with existing managed view made by mesh read for now - if(this->local_vel.size()==0&&this->vel.size()!=0){ - this->local_vel = this->vel; - } - if(this->local_vel_n0.size()==0&&this->vel_n0.size()!=0){ - this->local_vel_n0 = this->vel_n0; - } - //storage for nlocal+nghost - this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); - this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); - //assign local data to new storage - if(this->local_vel.size()!=0){ - super_vector_initialization(this->vel, this->local_vel, subview_map.size()); + //if both local and all vector were already allocated skip + if(this->vel.size()==0||this->local_vel.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_vel.size()==0&&this->vel.size()!=0){ + this->local_vel = this->vel; + } + if(this->local_vel_n0.size()==0&&this->vel_n0.size()!=0){ + this->local_vel_n0 = this->vel_n0; + } + //storage for nlocal+nghost + this->vel = DistributedDCArray(partitioned_map, num_dims, "node_velocity"); + this->vel_n0 = DistributedDCArray(partitioned_map, num_dims, "node_velocity_n0"); + //assign local data to new storage + if(this->local_vel.size()!=0){ + super_vector_initialization(this->vel, this->local_vel, subview_map.size()); + } + if(this->local_vel_n0.size()!=0){ + super_vector_initialization(this->vel_n0, this->local_vel_n0, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_vel = DistributedDCArray(this->vel,subview_map); + this->local_vel_n0 = DistributedDCArray(this->vel_n0,subview_map); } - if(this->local_vel_n0.size()!=0){ - super_vector_initialization(this->vel_n0, this->local_vel_n0, subview_map.size()); - } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_vel = DistributedDCArray(this->vel,subview_map); - this->local_vel_n0 = DistributedDCArray(this->vel_n0,subview_map); break; case node_state::force: - //store local data with existing managed view made by mesh read for now - if(this->local_force.size()==0&&this->force.size()!=0){ - this->local_force = this->force; - } - //storage for nlocal+nghost - this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); - //assign local data to new storage - if(this->local_force.size()!=0){ - super_vector_initialization(this->force, this->local_force, subview_map.size()); + //if both local and all vector were already allocated skip + if(this->force.size()==0||this->local_force.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_force.size()==0&&this->force.size()!=0){ + this->local_force = this->force; + } + //storage for nlocal+nghost + this->force = DistributedDCArray(partitioned_map, num_dims, "node_force"); + //assign local data to new storage + if(this->local_force.size()!=0){ + super_vector_initialization(this->force, this->local_force, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_force = DistributedDCArray(this->force,subview_map); } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_force = DistributedDCArray(this->force,subview_map); break; case node_state::mass: - //store local data with existing managed view made by mesh read for now - if(this->local_mass.size()==0&&this->mass.size()!=0){ - this->local_mass = this->mass; - } - //storage for nlocal+nghost - this->mass = DistributedDCArray(partitioned_map, "node_mass"); - //assign local data to new storage - if(this->local_mass.size()!=0){ - super_vector_initialization(this->mass, this->local_mass, subview_map.size()); + //if both local and all vector were already allocated skip + if(this->mass.size()==0||this->local_mass.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_mass.size()==0&&this->mass.size()!=0){ + this->local_mass = this->mass; + } + //storage for nlocal+nghost + this->mass = DistributedDCArray(partitioned_map, "node_mass"); + //assign local data to new storage + if(this->local_mass.size()!=0){ + super_vector_initialization(this->mass, this->local_mass, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_mass = DistributedDCArray(this->mass,subview_map); } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_mass = DistributedDCArray(this->mass,subview_map); break; case node_state::temp: - //store local data with existing managed view made by mesh read for now - if(this->local_temp.size()==0&&this->temp.size()!=0){ - this->local_temp = this->temp; - } - if(this->local_temp_n0.size()==0&&this->temp_n0.size()!=0){ - this->local_temp_n0 = this->temp_n0; - } - //storage for nlocal+nghost - this->temp = DistributedDCArray(partitioned_map, "node_temp"); - this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); - //assign local data to new storage - if(this->local_temp.size()!=0){ - super_vector_initialization(this->temp, this->local_temp, subview_map.size()); + //if both local and all vector were already allocated skip + if(this->temp.size()==0||this->local_temp.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_temp.size()==0&&this->temp.size()!=0){ + this->local_temp = this->temp; + } + if(this->local_temp_n0.size()==0&&this->temp_n0.size()!=0){ + this->local_temp_n0 = this->temp_n0; + } + //storage for nlocal+nghost + this->temp = DistributedDCArray(partitioned_map, "node_temp"); + this->temp_n0 = DistributedDCArray(partitioned_map, "node_temp_n0"); + //assign local data to new storage + if(this->local_temp.size()!=0){ + super_vector_initialization(this->temp, this->local_temp, subview_map.size()); + } + if(this->local_temp_n0.size()!=0){ + super_vector_initialization(this->temp_n0, this->local_temp_n0, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_temp = DistributedDCArray(this->temp,subview_map); + this->local_temp_n0 = DistributedDCArray(this->temp_n0,subview_map); } - if(this->local_temp_n0.size()!=0){ - super_vector_initialization(this->temp_n0, this->local_temp_n0, subview_map.size()); - } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_temp = DistributedDCArray(this->temp,subview_map); - this->local_temp_n0 = DistributedDCArray(this->temp_n0,subview_map); break; case node_state::heat_transfer: - //store local data with existing managed view made by mesh read for now - if(this->local_q_transfer.size()==0&&this->q_transfer.size()!=0){ - this->local_q_transfer = this->q_transfer; - } - //storage for nlocal+nghost - this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); - - //assign local data to new storage - if(this->local_q_transfer.size()!=0){ - super_vector_initialization(this->q_transfer, this->local_q_transfer, subview_map.size()); + //if both local and all vector were already allocated skip + if(this->q_transfer.size()==0||this->local_q_transfer.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_q_transfer.size()==0&&this->q_transfer.size()!=0){ + this->local_q_transfer = this->q_transfer; + } + //storage for nlocal+nghost + this->q_transfer = DistributedDCArray(partitioned_map, "node_q_transfer"); + + //assign local data to new storage + if(this->local_q_transfer.size()!=0){ + super_vector_initialization(this->q_transfer, this->local_q_transfer, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_q_transfer = DistributedDCArray(this->q_transfer,subview_map); } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_q_transfer = DistributedDCArray(this->q_transfer,subview_map); break; case node_state::gradient_level_set: - //store local data with existing managed view made by mesh read for now - if(this->local_gradient_level_set.size()==0&&this->gradient_level_set.size()!=0){ - this->local_gradient_level_set = this->gradient_level_set; - } - //storage for nlocal+nghost - this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); - //assign local data to new storage - if(this->gradient_level_set.size()!=0){ - super_vector_initialization(this->gradient_level_set, this->local_gradient_level_set, subview_map.size()); + //if both local and all vector were already allocated skip + if(this->gradient_level_set.size()==0||this->local_gradient_level_set.size()==0){ + //store local data with existing managed view made by mesh read for now + if(this->local_gradient_level_set.size()==0&&this->gradient_level_set.size()!=0){ + this->local_gradient_level_set = this->gradient_level_set; + } + //storage for nlocal+nghost + this->gradient_level_set = DistributedDCArray(partitioned_map, num_dims, "node_grad_levelset"); + //assign local data to new storage + if(this->gradient_level_set.size()!=0){ + super_vector_initialization(this->gradient_level_set, this->local_gradient_level_set, subview_map.size()); + } + //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here + this->local_gradient_level_set = DistributedDCArray(this->gradient_level_set,subview_map); } - //replace local data storage with subview of nlocal+nghost; previous managed view should self-destruct here - this->local_gradient_level_set = DistributedDCArray(this->gradient_level_set,subview_map); break; default: std::cout<<"Desired node state not understood in node_t initialize"< num_material_elems; ///< returns the exact number of matpts DCArrayKokkos num_material_elems_buffer; ///< returns the number of matpts plus buffer + DCArrayKokkos num_material_local_elems; ///< returns the exact number of matpts DRaggedRightArrayKokkos elem; ///< returns the elem for this material @@ -627,6 +649,12 @@ struct MaterialToMeshMap_t this->num_material_elems_buffer = DCArrayKokkos (num_mats, "num_material_elems_with_buffer"); } + // Note: num_material_elems is allocated in problem setup + if (num_material_local_elems.size() == 0){ + this->num_material_local_elems = DCArrayKokkos (num_mats, "num_material_local_elems"); + } + + }; // end method }; // end MaterialtoMeshMaps_t @@ -661,6 +689,7 @@ enum class material_pt_state struct MaterialPoint_t { DCArrayKokkos num_material_points; ///< the actual number of material points, omitting the buffer + DCArrayKokkos num_material_local_points; ///< the actual number of material points, omitting the buffer DCArrayKokkos num_material_points_buffer; ///< number of material points plus a buffer DRaggedRightArrayKokkos den; ///< MaterialPoint density @@ -708,6 +737,10 @@ struct MaterialPoint_t this->num_material_points_buffer = DCArrayKokkos (num_mats, "num_material_points_with_buffer"); } + if (num_material_local_points.size() == 0){ + this->num_material_local_points = DCArrayKokkos (num_mats, "num_material_local_points"); + } + }; // end method // initialization method (num_dims) diff --git a/single-node-refactor/src/common/src/region_fill.cpp b/single-node-refactor/src/common/src/region_fill.cpp index b28898e81..cc04f2f4b 100644 --- a/single-node-refactor/src/common/src/region_fill.cpp +++ b/single-node-refactor/src/common/src/region_fill.cpp @@ -59,11 +59,12 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, // the number of elems and nodes in the mesh const size_t num_dims = mesh.num_dims; const size_t num_elems = mesh.num_elems; + const size_t num_local_elems = mesh.num_local_elems; const size_t num_nodes = mesh.num_nodes; const size_t num_gauss_points = mesh.num_leg_gauss_in_elem*mesh.num_elems; const size_t num_mats = Materials.num_mats; // the number of materials on the mesh - + // Calculate element volume geometry::get_vol(State.GaussPoints.vol, State.node.coords, mesh); @@ -136,6 +137,7 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, // a counter for the Material index spaces DCArrayKokkos num_elems_saved_for_mat(num_mats, "num_elems_saved_for_mat"); + DCArrayKokkos num_local_elems_saved_for_mat(num_mats, "num_local_elems_saved_for_mat"); for (int mat_id = 0; mat_id < num_mats; mat_id++) { size_t sum_local; @@ -159,7 +161,30 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, num_elems_saved_for_mat.host(mat_id) = sum_total; } // end for + for (int mat_id = 0; mat_id < num_mats; mat_id++) { + size_t sum_local; + size_t sum_total; + + FOR_REDUCE_SUM(elem_gid, 0, num_local_elems, sum_local, { + + // loop over the materials in the element + for (size_t a_mat_in_elem=0; a_mat_in_elem < State.MeshtoMaterialMaps.num_mats_in_elem(elem_gid); a_mat_in_elem++){ + + // check to see if it is mat_id + if (State.MeshtoMaterialMaps.mat_id(elem_gid, a_mat_in_elem) == mat_id) { + // increment the number of elements the materials live in + sum_local++; + } // end if a_mat is equal to mat_id + + } // end loop over materials in elem + }, sum_total); + + // material index space size + num_local_elems_saved_for_mat.host(mat_id) = sum_total; + } // end for + num_elems_saved_for_mat.update_device(); + num_local_elems_saved_for_mat.update_device(); Kokkos::fence(); @@ -186,6 +211,8 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, // The exact size plus a buffer is for e.g., remap. The buffers are shortly below here. State.MaterialToMeshMaps.num_material_elems.host(mat_id) = num_elems_saved_for_mat.host(mat_id); State.MaterialPoints.num_material_points.host(mat_id) = num_elems_saved_for_mat.host(mat_id) * num_mat_pts_in_elem; + State.MaterialToMeshMaps.num_material_local_elems.host(mat_id) = num_local_elems_saved_for_mat.host(mat_id); + State.MaterialPoints.num_material_local_points.host(mat_id) = num_local_elems_saved_for_mat.host(mat_id) * num_mat_pts_in_elem; State.MaterialCorners.num_material_corners.host(mat_id) = num_elems_saved_for_mat.host(mat_id) * mesh.num_nodes_in_elem; State.MaterialZones.num_material_zones.host(mat_id) = num_elems_saved_for_mat.host(mat_id) * mesh.num_zones_in_elem; @@ -202,6 +229,8 @@ void simulation_setup(SimulationParameters_t& SimulationParameters, // copy to device the actual sizes State.MaterialToMeshMaps.num_material_elems.update_device(); State.MaterialPoints.num_material_points.update_device(); + State.MaterialToMeshMaps.num_material_local_elems.update_device(); + State.MaterialPoints.num_material_local_points.update_device(); State.MaterialCorners.num_material_corners.update_device(); State.MaterialZones.num_material_zones.update_device(); @@ -772,6 +801,7 @@ void material_state_setup(SimulationParameters_t& SimulationParameters, State.MaterialPoints.mass.host(mat_id,mat_point_lid) = fillGaussState.den.host(gauss_gid,a_mat_in_elem) * mat_vol; + } // --- set eroded flag to false --- From eb68370ba1ae861ee1e5b0456fe2fef6d2e1a3f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 11 Jul 2025 18:49:34 -0600 Subject: [PATCH 31/66] WIP: MPI refactor --- .../Solvers/SGH_solver_3D/src/momentum.cpp | 2 +- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 7 +++--- .../Solvers/SGH_solver_3D/src/sgh_setup.cpp | 3 +++ .../src/common/include/mesh_io.h | 22 ++++++++++++++++--- .../src/common/include/state.h | 3 +++ 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp index c26ada2e4..a651d30ab 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/momentum.cpp @@ -61,7 +61,7 @@ void SGH3D::update_velocity(double rk_alpha, const size_t num_dims = mesh.num_dims; // walk over the nodes to update the velocity - FOR_ALL(node_gid, 0, mesh.num_nodes, { + FOR_ALL(node_gid, 0, mesh.num_local_nodes, { // loop over all corners around the node and calculate the nodal force for (size_t corner_lid = 0; corner_lid < mesh.num_corners_in_node(node_gid); corner_lid++) { diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index 5f1a9cf3d..8f9c35d6b 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -247,11 +247,11 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, MPI_Allreduce(&min_dt_calc, &dt, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); if(myrank==0){ if (cycle == 0) { - printf("cycle = %lu, time = %.8f, time step = %.8f \n", cycle, time_value, dt); + printf("cycle = %lu, time = %.12f, time step = %.12f \n", cycle, time_value, dt); } // print time step every 10 cycles else if (cycle % 20 == 0) { - printf("cycle = %lu, time = %.8f, time step = %.8f \n", cycle, time_value, dt); + printf("cycle = %lu, time = %.12f, time step = %.12f \n", cycle, time_value, dt); } // end if } @@ -597,7 +597,8 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, mass_domain_nodes_tend = sum_domain_node_mass(mesh, State.node.coords, State.node.mass); - + + MPI_Allreduce(&mass_domain_nodes_tend, &global_mass_domain_nodes_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&mass_domain_all_mats_tend, &global_mass_domain_all_mats_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if(myrank==0){ printf("material mass conservation error = %f \n", global_mass_domain_all_mats_tend - global_mass_domain_all_mats_t0); diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp index f72f99af0..9c56369d2 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_setup.cpp @@ -59,6 +59,9 @@ void SGH3D::setup(SimulationParameters_t& SimulationParameters, { // add a flag on whether SGH was set up, if(SGH_setup_already==false) + //update node velocity on ghosts + node_velocity_comms.execute_comms(); + const size_t num_mats = Materials.num_mats; // the number of materials on the mesh // calculate pressure, sound speed, and stress for each material diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 9b0e9d795..93545814c 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -4181,8 +4181,16 @@ class MeshWriter const size_t num_elems = mesh.num_elems; const size_t num_dims = mesh.num_dims; + //host version of local element map for argument compatibility + HostDistributedMap host_local_element_map; + DCArrayKokkos global_indices_of_local_elements; + for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ + global_indices_of_local_elements(ielem) = mesh.local_element_map.getGlobalIndex(ielem); + } + host_local_element_map = HostDistributedMap(global_indices_of_local_elements); + // save the cell state to an array for exporting to graphics files - auto elem_fields = CArray(num_elems, num_cell_scalar_vars); + auto elem_fields = DistributedCArray(host_local_element_map, num_cell_scalar_vars); int elem_switch = 1; DCArrayKokkos speed(num_elems, "speed"); @@ -4254,9 +4262,17 @@ class MeshWriter elem_switch *= -1; } // end for elem_gid + //host version of local element map for argument compatibility + HostDistributedMap host_node_map; + DCArrayKokkos global_indices_of_local_nodes; + for(int inode = 0; inode < mesh.num_local_nodes; inode++){ + global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); + } + host_node_map = HostDistributedMap(global_indices_of_local_nodes); + // save the vertex vector fields to an array for exporting to graphics files - CArray vec_fields(num_nodes, num_point_vec_vars, 3); - CArray point_scalar_fields(num_nodes, num_point_scalar_vars); + DistributedCArray vec_fields(host_node_map, num_point_vec_vars, 3); + DistributedCArray point_scalar_fields(host_node_map, num_point_scalar_vars); for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { // position, var 0 diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index ae57d984b..c5cd1bbba 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -61,11 +61,14 @@ enum class fill_gauss_state //distributed vector type in use using DistributedMap = TpetraPartitionMap<>; +using HostDistributedMap = TpetraPartitionMap; template using DistributedDFArray = TpetraDFArray; template using DistributedDCArray = TpetraDCArray; template +using DistributedCArray = TpetraDCArray; +template using CommPlan = TpetraLRCommunicationPlan; From 7bc78f325fdc89ccc5a80e12580703d6283ce278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sat, 12 Jul 2025 00:09:34 -0600 Subject: [PATCH 32/66] WIP: MPI refactor --- .../src/common/include/mesh_io.h | 55 ++++++++++++++++--- 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 93545814c..cf78d6305 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3711,7 +3711,10 @@ class MeshWriter std::vector material_pt_states) { size_t num_mats = State.MaterialPoints.num_material_points.size(); - + + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); // ---- Update host data ---- // material point values @@ -3759,12 +3762,20 @@ class MeshWriter const size_t num_dims = mesh.num_dims; // save the cell state to an array for exporting to graphics files - auto elem_fields = CArray(num_elems, num_scalar_vars); + + //host version of local element map for argument compatibility + HostDistributedMap host_local_element_map; + DCArrayKokkos global_indices_of_local_elements; + for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ + global_indices_of_local_elements(ielem) = mesh.local_element_map.getGlobalIndex(ielem); + } + host_local_element_map = HostDistributedMap(global_indices_of_local_elements); + auto elem_fields = DistributedCArray(host_local_element_map, num_scalar_vars); int elem_switch = 1; DCArrayKokkos speed(num_elems, "speed"); - FOR_ALL(elem_gid, 0, num_elems, { + FOR_ALL(elem_gid, 0, mesh.num_local_elems, { double elem_vel[3]; // note:initialization with a list won't work elem_vel[0] = 0.0; elem_vel[1] = 0.0; @@ -3797,7 +3808,7 @@ class MeshWriter // export material centeric data to the elements for (int mat_id = 0; mat_id < num_mats; mat_id++) { - size_t num_mat_elems = State.MaterialToMeshMaps.num_material_elems.host(mat_id); + size_t num_mat_elems = State.MaterialToMeshMaps.num_material_local_elems.host(mat_id); for (size_t mat_elem_lid = 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { // 1 material per element @@ -3828,8 +3839,22 @@ class MeshWriter elem_switch *= -1; } // end for elem_gid + //collective map has all indices on rank 0 and non on other ranks + HostDistributedMap collective_elem_map; + long long int num_collective_elem_indices = 0; + if(myrank==0) num_collective_elem_indices = mesh.global_num_elems; + collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); + + //host version of local element map for argument compatibility + HostDistributedMap host_node_map; + DCArrayKokkos global_indices_of_local_nodes; + for(int inode = 0; inode < mesh.num_local_nodes; inode++){ + global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); + } + host_node_map = HostDistributedMap(global_indices_of_local_nodes); + // save the vertex vector fields to an array for exporting to graphics files - CArray vec_fields(num_nodes, num_vec_vars, 3); + DistributedCArray vec_fields(host_node_map, num_vec_vars, 3); for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { // position, var 0 @@ -3865,6 +3890,11 @@ class MeshWriter } // end for loop over vertices + //collective map has all indices on rank 0 and non on other ranks + HostDistributedMap collective_node_map; + long long int num_collective_node_indices = 0; + if(myrank==0) num_collective_node_indices = mesh.global_num_nodes; + collective_node_map = HostDistributedMap(mesh.global_num_nodes, num_collective_node_indices); // --------------------------------------------------------------------------- // Setup of file and directoring for exporting @@ -4122,7 +4152,10 @@ class MeshWriter std::vector gauss_pt_states, std::vector material_pt_states) { - + + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); size_t num_mats = State.MaterialPoints.num_material_points.size(); // ---- Update host data ---- @@ -4228,7 +4261,7 @@ class MeshWriter // export material centeric data to the elements for (int mat_id = 0; mat_id < num_mats; mat_id++) { - size_t num_mat_elems = State.MaterialToMeshMaps.num_material_elems.host(mat_id); + size_t num_mat_elems = State.MaterialToMeshMaps.num_material_local_elems.host(mat_id); for (size_t mat_elem_lid = 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { // 1 material per element @@ -4255,13 +4288,19 @@ class MeshWriter // export element centric data double e_switch = 1; - for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) { + for (size_t elem_gid = 0; elem_gid < mesh.num_local_elems; elem_gid++) { elem_fields(elem_gid, 3) = State.GaussPoints.vol.host(elem_gid); elem_fields(elem_gid, 6) = speed.host(elem_gid); elem_fields(elem_gid, 8) = State.GaussPoints.div.host(elem_gid); elem_switch *= -1; } // end for elem_gid + //collective map has all indices on rank 0 and non on other ranks + HostDistributedMap collective_elem_map; + long long int num_collective_elem_indices = 0; + if(myrank==0) num_collective_elem_indices = mesh.global_num_elems; + collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); + //host version of local element map for argument compatibility HostDistributedMap host_node_map; DCArrayKokkos global_indices_of_local_nodes; From 25f6ad100f6718193499c49884217e7e3ae38b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 15 Jul 2025 23:55:57 -0600 Subject: [PATCH 33/66] WIP: MPI refactor, collective ensight output on rank 0 --- .../src/common/include/mesh.h | 5 + .../src/common/include/mesh_io.h | 381 ++++++++++-------- .../src/common/include/state.h | 2 + 3 files changed, 211 insertions(+), 177 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 88962eb12..2d4d455e7 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -241,6 +241,7 @@ struct Mesh_t size_t num_lob_gauss_in_elem; ///< Number of Gauss Lobatto points in an element DistributedDCArray nodes_in_elem; ///< Nodes in an element + DistributedDCArray local_nodes_in_elem; ///< Nodes in uniquely distributed element (subview of above) CArrayKokkos corners_in_elem; ///< Corners in an element -- this can just be a functor RaggedRightArrayKokkos elems_in_elem; ///< Elements connected to an element @@ -607,6 +608,10 @@ struct Mesh_t nodes_in_elem.update_device(); + /*connectivity data for uniquely assigned elements (used mostly to simplify file output comms) + constructed as a subview of nodes_in_elem*/ + local_nodes_in_elem = DistributedDCArray(nodes_in_elem,local_element_map); + // element_map->describe(*fos,Teuchos::VERB_EXTREME); // element_map->describe(*fos,Teuchos::VERB_EXTREME); // create distributed multivector of the local node data and all (local + ghost) node storage diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index cf78d6305..bb9060676 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3765,7 +3765,7 @@ class MeshWriter //host version of local element map for argument compatibility HostDistributedMap host_local_element_map; - DCArrayKokkos global_indices_of_local_elements; + DCArrayKokkos global_indices_of_local_elements(mesh.num_local_elems); for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ global_indices_of_local_elements(ielem) = mesh.local_element_map.getGlobalIndex(ielem); } @@ -3773,7 +3773,6 @@ class MeshWriter auto elem_fields = DistributedCArray(host_local_element_map, num_scalar_vars); int elem_switch = 1; - DCArrayKokkos speed(num_elems, "speed"); FOR_ALL(elem_gid, 0, mesh.num_local_elems, { double elem_vel[3]; // note:initialization with a list won't work @@ -3832,7 +3831,7 @@ class MeshWriter // export element centric data double e_switch = 1; - for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) { + for (size_t elem_gid = 0; elem_gid < mesh.num_local_elems; elem_gid++) { elem_fields(elem_gid, 3) = State.GaussPoints.vol.host(elem_gid); elem_fields(elem_gid, 6) = speed.host(elem_gid); elem_fields(elem_gid, 8) = e_switch; @@ -3844,10 +3843,32 @@ class MeshWriter long long int num_collective_elem_indices = 0; if(myrank==0) num_collective_elem_indices = mesh.global_num_elems; collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); + + //collective vector and comms to the collective vector for elem fields + DistributedCArray collective_elem_fields(collective_elem_map, num_scalar_vars); + HostCommPlan collective_elem_comms(collective_elem_fields, elem_fields); + collective_elem_comms.execute_comms(); + + //host of node in elem for Trilinos template argument compatibility + DistributedCArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem); + + //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) + for (size_t elem_id = 0; elem_id < mesh.num_local_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem(elem_id, node_lid)); + } + } // end for elem_gid + + //collect nodes in elem with a conversion back to global node ids + DistributedCArray collective_nodes_in_elem(collective_elem_map, mesh.num_nodes_in_elem); + HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem); + + nodes_in_elem_comms.execute_comms(); - //host version of local element map for argument compatibility + //NODE DATA COLLECTION + //host version of local node map for argument compatibility HostDistributedMap host_node_map; - DCArrayKokkos global_indices_of_local_nodes; + DCArrayKokkos global_indices_of_local_nodes(mesh.num_local_nodes); for(int inode = 0; inode < mesh.num_local_nodes; inode++){ global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); } @@ -3856,7 +3877,7 @@ class MeshWriter // save the vertex vector fields to an array for exporting to graphics files DistributedCArray vec_fields(host_node_map, num_vec_vars, 3); - for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { + for (size_t node_gid = 0; node_gid < mesh.num_local_nodes; node_gid++) { // position, var 0 vec_fields(node_gid, 0, 0) = State.node.coords.host(node_gid, 0); vec_fields(node_gid, 0, 1) = State.node.coords.host(node_gid, 1); @@ -3896,226 +3917,232 @@ class MeshWriter if(myrank==0) num_collective_node_indices = mesh.global_num_nodes; collective_node_map = HostDistributedMap(mesh.global_num_nodes, num_collective_node_indices); - // --------------------------------------------------------------------------- - // Setup of file and directoring for exporting - // --------------------------------------------------------------------------- - FILE* out[20]; // the output files that are written to - char filename[128]; - int max_len = sizeof filename; - int str_output_len; - - struct stat st; + //collective vector and comms to the collective vector for node fields + DistributedCArray collective_vec_fields(collective_node_map, num_vec_vars, 3); + HostCommPlan collective_node_comms(collective_vec_fields, vec_fields); + collective_node_comms.execute_comms(); - if (stat("ensight", &st) != 0) { - system("mkdir ensight"); - } + if(myrank==0){ + // --------------------------------------------------------------------------- + // Setup of file and directoring for exporting + // --------------------------------------------------------------------------- + FILE* out[20]; // the output files that are written to + char filename[128]; + int max_len = sizeof filename; + int str_output_len; - if (stat("ensight/data", &st) != 0) { - system("mkdir ensight/data"); - } + struct stat st; - // --------------------------------------------------------------------------- - // Write the Geometry file - // --------------------------------------------------------------------------- - // sprintf(filename, "ensight/data/%s.%05d.geo", name, graphics_id); - str_output_len = snprintf(filename, max_len, "ensight/data/%s.%05d.geo", name, graphics_id); - // filename has the full string - if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + if (stat("ensight", &st) != 0) { + system("mkdir ensight"); + } - out[0] = fopen(filename, "w"); + if (stat("ensight/data", &st) != 0) { + system("mkdir ensight/data"); + } - fprintf(out[0], "A graphics dump by Fierro \n"); + // --------------------------------------------------------------------------- + // Write the Geometry file + // --------------------------------------------------------------------------- + // sprintf(filename, "ensight/data/%s.%05d.geo", name, graphics_id); + str_output_len = snprintf(filename, max_len, "ensight/data/%s.%05d.geo", name, graphics_id); + // filename has the full string + if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - fprintf(out[0], "%s", "EnSight Gold geometry\n"); - fprintf(out[0], "%s", "node id assign\n"); - fprintf(out[0], "%s", "element id assign\n"); + out[0] = fopen(filename, "w"); - fprintf(out[0], "part\n"); - fprintf(out[0], "%10d\n", 1); - fprintf(out[0], "Mesh\n"); + fprintf(out[0], "A graphics dump by Fierro \n"); - // --- vertices --- - fprintf(out[0], "coordinates\n"); - fprintf(out[0], "%10lu\n", num_nodes); + fprintf(out[0], "%s", "EnSight Gold geometry\n"); + fprintf(out[0], "%s", "node id assign\n"); + fprintf(out[0], "%s", "element id assign\n"); - // write all components of the point coordinates - for (int node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], "%12.5e\n", State.node.coords.host(node_gid, 0)); - } + fprintf(out[0], "part\n"); + fprintf(out[0], "%10d\n", 1); + fprintf(out[0], "Mesh\n"); - for (int node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], "%12.5e\n", State.node.coords.host(node_gid, 1)); - } + // --- vertices --- + fprintf(out[0], "coordinates\n"); + fprintf(out[0], "%10lu\n", mesh.global_num_nodes); - for (int node_gid = 0; node_gid < num_nodes; node_gid++) { - if (num_dims == 3) { - fprintf(out[0], "%12.5e\n", State.node.coords.host(node_gid, 2)); - } - else{ - fprintf(out[0], "%12.5e\n", 0.0); + // write all components of the point coordinates + for (int node_gid = 0; node_gid < mesh.global_num_nodes; node_gid++) { + fprintf(out[0], "%12.5e\n", collective_vec_fields(node_gid, 0, 0)); } - } - // --- elements --- - if (num_dims == 3) { - fprintf(out[0], "hexa8\n"); - } - else{ - fprintf(out[0], "quad4\n"); - } - fprintf(out[0], "%10lu\n", num_elems); - - - int convert_ijk_to_ensight[8]; - if(mesh.num_dims==3){ - convert_ijk_to_ensight[0] = 0; - convert_ijk_to_ensight[1] = 1; - convert_ijk_to_ensight[2] = 3; - convert_ijk_to_ensight[3] = 2; - convert_ijk_to_ensight[4] = 4; - convert_ijk_to_ensight[5] = 5; - convert_ijk_to_ensight[6] = 7; - convert_ijk_to_ensight[7] = 6; - } - else{ - - convert_ijk_to_ensight[0] = 0; - convert_ijk_to_ensight[1] = 1; - convert_ijk_to_ensight[2] = 2; - convert_ijk_to_ensight[3] = 3; - convert_ijk_to_ensight[4] = 4; - convert_ijk_to_ensight[5] = 5; - convert_ijk_to_ensight[6] = 6; - convert_ijk_to_ensight[7] = 7; - } // end if - - - // write all global point numbers for this cell - for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - fprintf(out[0], "%10lu\t", mesh.nodes_in_elem.host(elem_gid, convert_ijk_to_ensight[node_lid]) + 1); // note: node_gid starts at 1 + for (int node_gid = 0; node_gid < mesh.global_num_nodes; node_gid++) { + fprintf(out[0], "%12.5e\n", collective_vec_fields(node_gid, 0, 1)); } - fprintf(out[0], "\n"); - } - fclose(out[0]); - - // --------------------------------------------------------------------------- - // Write the Scalar variable files - // --------------------------------------------------------------------------- - - // ensight_vars = (den, pres,...) - for (int var = 0; var < num_scalar_vars; var++) { - // write a scalar value - // sprintf(filename, "ensight/data/%s.%05d.%s", name, graphics_id, scalar_var_names[var]); - str_output_len = snprintf(filename, max_len, "ensight/data/%s.%05d.%s", name, graphics_id, scalar_var_names[var]); - if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - - out[0] = fopen(filename, "w"); + for (int node_gid = 0; node_gid < mesh.global_num_nodes; node_gid++) { + if (num_dims == 3) { + fprintf(out[0], "%12.5e\n", collective_vec_fields(node_gid, 0, 2)); + } + else{ + fprintf(out[0], "%12.5e\n", 0.0); + } + } - fprintf(out[0], "Per_elem scalar values\n"); - fprintf(out[0], "part\n"); - fprintf(out[0], "%10d\n", 1); + // --- elements --- if (num_dims == 3) { fprintf(out[0], "hexa8\n"); } else{ fprintf(out[0], "quad4\n"); } + fprintf(out[0], "%10lu\n", mesh.global_num_elems); + + + int convert_ijk_to_ensight[8]; + if(mesh.num_dims==3){ + convert_ijk_to_ensight[0] = 0; + convert_ijk_to_ensight[1] = 1; + convert_ijk_to_ensight[2] = 3; + convert_ijk_to_ensight[3] = 2; + convert_ijk_to_ensight[4] = 4; + convert_ijk_to_ensight[5] = 5; + convert_ijk_to_ensight[6] = 7; + convert_ijk_to_ensight[7] = 6; + } + else{ + + convert_ijk_to_ensight[0] = 0; + convert_ijk_to_ensight[1] = 1; + convert_ijk_to_ensight[2] = 2; + convert_ijk_to_ensight[3] = 3; + convert_ijk_to_ensight[4] = 4; + convert_ijk_to_ensight[5] = 5; + convert_ijk_to_ensight[6] = 6; + convert_ijk_to_ensight[7] = 7; + } // end if + - for (int elem_id = 0; elem_id < num_elems; elem_id++) { - fprintf(out[0], "%12.5e\n", elem_fields(elem_id, var)); + // write all global point numbers for this cell + for (int elem_gid = 0; elem_gid < mesh.global_num_elems; elem_gid++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + fprintf(out[0], "%10lu\t", collective_nodes_in_elem(elem_gid, convert_ijk_to_ensight[node_lid]) + 1); // note: node_gid starts at 1 + } + fprintf(out[0], "\n"); } fclose(out[0]); - } // end for var - // --------------------------------------------------------------------------- - // Write the Vector variable files - // --------------------------------------------------------------------------- + // --------------------------------------------------------------------------- + // Write the Scalar variable files + // --------------------------------------------------------------------------- - // ensight vector vars = (position, velocity, force) - for (int var = 0; var < num_vec_vars; var++) { - // sprintf(filename, "ensight/data/%s.%05d.%s", name, graphics_id, vec_var_names[var]); - str_output_len = snprintf(filename, max_len, "ensight/data/%s.%05d.%s", name, graphics_id, vec_var_names[var]); - if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + // ensight_vars = (den, pres,...) + for (int var = 0; var < num_scalar_vars; var++) { + // write a scalar value + // sprintf(filename, "ensight/data/%s.%05d.%s", name, graphics_id, scalar_var_names[var]); + str_output_len = snprintf(filename, max_len, "ensight/data/%s.%05d.%s", name, graphics_id, scalar_var_names[var]); + if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - out[0] = fopen(filename, "w"); - // fprintf(out[0],"Per_node vector values\n"); - // fprintf(out[0],"part\n"); - // fprintf(out[0],"%10d \n",1); - // fprintf(out[0],"hexa8\n"); // WARNING, maybe bug here? + out[0] = fopen(filename, "w"); - fprintf(out[0], "Per_node vector values\n"); - fprintf(out[0], "part\n"); - fprintf(out[0], "%10d\n", 1); - fprintf(out[0], "block\n"); + fprintf(out[0], "Per_elem scalar values\n"); + fprintf(out[0], "part\n"); + fprintf(out[0], "%10d\n", 1); + if (num_dims == 3) { + fprintf(out[0], "hexa8\n"); + } + else{ + fprintf(out[0], "quad4\n"); + } - for (int node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], "%12.5e\n", vec_fields(node_gid, var, 0)); - } + for (int elem_id = 0; elem_id < mesh.global_num_elems; elem_id++) { + fprintf(out[0], "%12.5e\n", collective_elem_fields(elem_id, var)); + } - for (int node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], "%12.5e\n", vec_fields(node_gid, var, 1)); - } + fclose(out[0]); + } // end for var - for (int node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], "%12.5e\n", vec_fields(node_gid, var, 2)); - } + // --------------------------------------------------------------------------- + // Write the Vector variable files + // --------------------------------------------------------------------------- - fclose(out[0]); - } // end for var + // ensight vector vars = (position, velocity, force) + for (int var = 0; var < num_vec_vars; var++) { + // sprintf(filename, "ensight/data/%s.%05d.%s", name, graphics_id, vec_var_names[var]); + str_output_len = snprintf(filename, max_len, "ensight/data/%s.%05d.%s", name, graphics_id, vec_var_names[var]); + if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - // --------------------------------------------------------------------------- - // Write the case file - // --------------------------------------------------------------------------- + out[0] = fopen(filename, "w"); + // fprintf(out[0],"Per_node vector values\n"); + // fprintf(out[0],"part\n"); + // fprintf(out[0],"%10d \n",1); + // fprintf(out[0],"hexa8\n"); // WARNING, maybe bug here? - // sprintf(filename, "ensight/%s.case", name); - str_output_len = snprintf(filename, max_len, "ensight/%s.case", name); - if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + fprintf(out[0], "Per_node vector values\n"); + fprintf(out[0], "part\n"); + fprintf(out[0], "%10d\n", 1); + fprintf(out[0], "block\n"); - out[0] = fopen(filename, "w"); + for (int node_gid = 0; node_gid < mesh.global_num_nodes; node_gid++) { + fprintf(out[0], "%12.5e\n", collective_vec_fields(node_gid, var, 0)); + } - fprintf(out[0], "FORMAT\n"); - fprintf(out[0], "type: ensight gold\n"); - fprintf(out[0], "GEOMETRY\n"); + for (int node_gid = 0; node_gid < mesh.global_num_nodes; node_gid++) { + fprintf(out[0], "%12.5e\n", collective_vec_fields(node_gid, var, 1)); + } - // sprintf(filename, "model: data/%s.*****.geo\n", name); - str_output_len = snprintf(filename, max_len, "model: data/%s.*****.geo\n", name); - if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + for (int node_gid = 0; node_gid < mesh.global_num_nodes; node_gid++) { + fprintf(out[0], "%12.5e\n", collective_vec_fields(node_gid, var, 2)); + } + + fclose(out[0]); + } // end for var - fprintf(out[0], "%s", filename); - fprintf(out[0], "VARIABLE\n"); + // --------------------------------------------------------------------------- + // Write the case file + // --------------------------------------------------------------------------- - for (int var = 0; var < num_scalar_vars; var++) { - // sprintf(filename, "scalar per element: %s data/%s.*****.%s\n", scalar_var_names[var], name, scalar_var_names[var]); - str_output_len = snprintf(filename, max_len, "scalar per element: %s data/%s.*****.%s\n", scalar_var_names[var], name, scalar_var_names[var]); + // sprintf(filename, "ensight/%s.case", name); + str_output_len = snprintf(filename, max_len, "ensight/%s.case", name); if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - fprintf(out[0], "%s", filename); - } + out[0] = fopen(filename, "w"); + + fprintf(out[0], "FORMAT\n"); + fprintf(out[0], "type: ensight gold\n"); + fprintf(out[0], "GEOMETRY\n"); - for (int var = 0; var < num_vec_vars; var++) { - // sprintf(filename, "vector per node: %s data/%s.*****.%s\n", vec_var_names[var], name, vec_var_names[var]); - str_output_len = snprintf(filename, max_len, "vector per node: %s data/%s.*****.%s\n", vec_var_names[var], name, vec_var_names[var]); + // sprintf(filename, "model: data/%s.*****.geo\n", name); + str_output_len = snprintf(filename, max_len, "model: data/%s.*****.geo\n", name); if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + fprintf(out[0], "%s", filename); - } + fprintf(out[0], "VARIABLE\n"); - fprintf(out[0], "TIME\n"); - fprintf(out[0], "time set: 1\n"); - fprintf(out[0], "number of steps: %4d\n", graphics_id + 1); - fprintf(out[0], "filename start number: 0\n"); - fprintf(out[0], "filename increment: 1\n"); - fprintf(out[0], "time values: \n"); + for (int var = 0; var < num_scalar_vars; var++) { + // sprintf(filename, "scalar per element: %s data/%s.*****.%s\n", scalar_var_names[var], name, scalar_var_names[var]); + str_output_len = snprintf(filename, max_len, "scalar per element: %s data/%s.*****.%s\n", scalar_var_names[var], name, scalar_var_names[var]); + if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - graphics_times(graphics_id) = time_value; + fprintf(out[0], "%s", filename); + } - for (int i = 0; i <= graphics_id; i++) { - fprintf(out[0], "%12.5e\n", graphics_times(i)); - } - fclose(out[0]); + for (int var = 0; var < num_vec_vars; var++) { + // sprintf(filename, "vector per node: %s data/%s.*****.%s\n", vec_var_names[var], name, vec_var_names[var]); + str_output_len = snprintf(filename, max_len, "vector per node: %s data/%s.*****.%s\n", vec_var_names[var], name, vec_var_names[var]); + if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + fprintf(out[0], "%s", filename); + } + + fprintf(out[0], "TIME\n"); + fprintf(out[0], "time set: 1\n"); + fprintf(out[0], "number of steps: %4d\n", graphics_id + 1); + fprintf(out[0], "filename start number: 0\n"); + fprintf(out[0], "filename increment: 1\n"); + fprintf(out[0], "time values: \n"); + graphics_times(graphics_id) = time_value; + + for (int i = 0; i <= graphics_id; i++) { + fprintf(out[0], "%12.5e\n", graphics_times(i)); + } + fclose(out[0]); + } // --------------------------------------------------------------------------- // Done writing the graphics dump // --------------------------------------------------------------------------- @@ -4124,7 +4151,7 @@ class MeshWriter graphics_id++; delete[] name; - + return; } diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index c5cd1bbba..7e3d4a7b0 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -70,6 +70,8 @@ template using DistributedCArray = TpetraDCArray; template using CommPlan = TpetraLRCommunicationPlan; +template +using HostCommPlan = TpetraLRCommunicationPlan; template From 34cd90d58b99828fc6299b73c627cd63d0754341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 17 Jul 2025 10:37:24 -0600 Subject: [PATCH 34/66] WIP: MPI refactor; 3D sgh works with phi_min=1 --- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 2 +- .../src/common/include/mesh.h | 25 ++++++++++++++----- .../src/common/include/mesh_io.h | 13 +++++----- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index 8f9c35d6b..7881d06c7 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -227,7 +227,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, State.MaterialPoints.sspd, State.MaterialPoints.eroded, State.MaterialToMeshMaps.elem, - State.MaterialToMeshMaps.num_material_elems.host(mat_id), + State.MaterialToMeshMaps.num_material_local_elems.host(mat_id), time_value, graphics_time, time_final, diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 2d4d455e7..553d80d1c 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -525,16 +525,16 @@ struct Mesh_t } // copy over from buffer to compressed storage - DCArrayKokkos Element_Global_Indices(nonoverlapping_count, "Element_Global_Indices"); + DCArrayKokkos Local_Element_Global_Indices(nonoverlapping_count, "Local_Element_Global_Indices"); for (int ibuffer = 0; ibuffer < nonoverlapping_count; ibuffer++) { - Element_Global_Indices.host(ibuffer) = Initial_Element_Global_Indices.host(ibuffer); + Local_Element_Global_Indices.host(ibuffer) = Initial_Element_Global_Indices.host(ibuffer); } num_local_elems = nonoverlapping_count; - Element_Global_Indices.update_device(); + Local_Element_Global_Indices.update_device(); // create nonoverlapping element map - local_element_map = DistributedMap(Element_Global_Indices); + local_element_map = DistributedMap(Local_Element_Global_Indices); // sort element connectivity so nonoverlaps are sequentially found first // define initial sorting of global indices @@ -545,6 +545,7 @@ struct Mesh_t { Initial_Element_Global_Indices.host(ielem) = element_map.getGlobalIndex(ielem); } + Initial_Element_Global_Indices.update_device(); // re-sort so local elements in the nonoverlapping map are first in storage CArrayKokkos Temp_Nodes(num_nodes_in_elem); @@ -552,6 +553,7 @@ struct Mesh_t long long int temp_element_gid, current_element_gid; int last_storage_index = num_elems - 1; + //nodes_in_elem.print(); for (int ielem = 0; ielem < num_local_elems; ielem++) { @@ -575,7 +577,7 @@ struct Mesh_t // test if swapped element is also not part of the non overlap map; if so lower loop counter to repeat the above temp_element_gid = Initial_Element_Global_Indices.host(ielem); - if (!element_map.isProcessGlobalIndex(temp_element_gid)) + if (!local_element_map.isProcessGlobalIndex(temp_element_gid)) { ielem--; } @@ -598,16 +600,27 @@ struct Mesh_t } //nodes_in_elem_temp.update_device(); nodes_in_elem = nodes_in_elem_temp; + //nodes_in_elem.print(); //convert global ids stored in nodes_in_elem to local node ids spanning 0:num_nodes on this process for(int ielem= 0; ielem < num_elems; ielem++) { for(int inode = 0; inode < num_nodes_in_elem; inode++){ - nodes_in_elem.host(ielem, inode) = all_node_map.getLocalIndex(nodes_in_elem(ielem, inode)); + nodes_in_elem.host(ielem, inode) = all_node_map.getLocalIndex(nodes_in_elem.host(ielem, inode)); } } nodes_in_elem.update_device(); + //local element map may need resorting after above permuting; update the order + for (int ielem = 0; ielem < num_local_elems; ielem++) + { + Local_Element_Global_Indices.host(ielem) = element_map.getGlobalIndex(ielem); + } + Local_Element_Global_Indices.update_device(); + local_element_map = DistributedMap(Local_Element_Global_Indices); + + //nodes_in_elem.print(); + /*connectivity data for uniquely assigned elements (used mostly to simplify file output comms) constructed as a subview of nodes_in_elem*/ local_nodes_in_elem = DistributedDCArray(nodes_in_elem,local_element_map); diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index bb9060676..614714e28 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -2102,6 +2102,7 @@ class MeshBuilder before the global mesh data on rank 0 falls out of scope*/ int global_num_nodes, global_num_elems; CArrayKokkos read_buffer; + CArrayKokkos read_buffer_edof; CArrayKokkos global_coords; CArrayKokkos global_nodes_in_elem; @@ -2366,7 +2367,7 @@ class MeshBuilder // read in element connectivity // we're gonna reallocate for the words per line expected for the element connectivity - read_buffer = CArrayKokkos(BUFFER_LINES, num_nodes_in_elem); + read_buffer_edof = CArrayKokkos(BUFFER_LINES, num_nodes_in_elem); // calculate buffer iterations to read number of lines buffer_iterations = global_num_elems / BUFFER_LINES; @@ -2394,7 +2395,7 @@ class MeshBuilder { for (int inode = 0; inode < num_nodes_in_elem; inode++) { - read_buffer(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_LINES + buffer_loop, inode); + read_buffer_edof(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_LINES + buffer_loop, inode); } // std::cout < Date: Wed, 30 Jul 2025 17:39:46 -0600 Subject: [PATCH 35/66] WIP: collective vtu writer --- .../src/common/include/mesh_io.h | 624 +++++++++++++++--- .../src/common/include/state.h | 6 +- 2 files changed, 539 insertions(+), 91 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 614714e28..ac276f9ce 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3351,11 +3351,24 @@ class MeshWriter const size_t num_nodes_in_elem = mesh.num_nodes_in_elem; const int Pn_order = mesh.Pn; - // save the elem state to an array for exporting to graphics files - DCArrayKokkos elem_scalar_fields(num_elem_scalar_vars, num_elems, "elem_scalars"); - DCArrayKokkos elem_tensor_fields(num_elem_tensor_vars, num_elems, 3, 3, "elem_tensors"); + /* save the elem state to an array for exporting to graphics files*/ + + //host version of local element map for argument compatibility + HostDistributedMap host_local_element_map; + DCArrayKokkos global_indices_of_local_elements(mesh.num_local_elems); + for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ + global_indices_of_local_elements(ielem) = mesh.local_element_map.getGlobalIndex(ielem); + } + host_local_element_map = HostDistributedMap(global_indices_of_local_elements); + DistributedDFArray elem_scalar_fields(host_local_element_map, num_elem_scalar_vars, "elem_scalars"); + DistributedDFArray elem_tensor_fields(host_local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); elem_scalar_fields.set_values(0.0); elem_tensor_fields.set_values(0.0); + //duplicate for now to allow compatibility with comm plan object when using Tpetra (src and dst device type must be equal) + //We dont want to make a dual view of the rank 0 collector since that will blow device memory constraints sooner than this duplicate + //one other option is to just do the concatenation ops on the host + DistributedFArray host_elem_scalar_fields(host_local_element_map, num_elem_scalar_vars, "elem_scalars"); + DistributedFArray host_elem_tensor_fields(host_local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); // ----------------------------------------------------------------------- @@ -3371,9 +3384,9 @@ class MeshWriter State.MaterialToMeshMaps.elem, SimulationParameters.output_options.output_elem_state, SimulationParameters.output_options.output_gauss_pt_state, - State.MaterialToMeshMaps.num_material_elems.host(mat_id), + State.MaterialToMeshMaps.num_material_local_elems.host(mat_id), mat_id, - num_elems, + num_local_elems, den_id, pres_id, sie_id, @@ -3392,29 +3405,70 @@ class MeshWriter if (sie_id>=0){ FOR_ALL(elem_gid, 0, num_elems, { // get sie by dividing by the mass - elem_scalar_fields(sie_id, elem_gid) /= (elem_scalar_fields(mass_id, elem_gid)+1.e-20); + elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(mass_id, elem_gid)+1.e-20); }); } // end if Kokkos::fence(); elem_scalar_fields.update_host(); elem_tensor_fields.update_host(); + + // ----------------------------------------------------------------------- + // copy the output fields to host side array compatible with Tpetra Comms + // ----------------------------------------------------------------------- + + for (int mat_id = 0; mat_id < num_mats; mat_id++) { + + // material point and guass point state are concatenated together + copy_elem_fields(elem_scalar_fields, + elem_tensor_fields, + host_elem_scalar_fields, + host_elem_tensor_fields, + State.MaterialToMeshMaps.elem, + SimulationParameters.output_options.output_elem_state, + SimulationParameters.output_options.output_gauss_pt_state, + State.MaterialToMeshMaps.num_material_local_elems.host(mat_id), + mat_id, + num_local_elems, + den_id, + pres_id, + sie_id, + sspd_id, + mass_id, + stress_id, + vol_id, + div_id, + level_set_id, + vel_grad_id, + conductivity_id, + specific_heat_id); + } // end for mats // ************************ // Build the nodal fields // ************************ + //host version of local node map for argument compatibility + HostDistributedMap host_node_map; + DCArrayKokkos global_indices_of_local_nodes(mesh.num_local_nodes); + for(int inode = 0; inode < mesh.num_local_nodes; inode++){ + global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); + } + host_node_map = HostDistributedMap(global_indices_of_local_nodes); + // save the nodal fields to an array for exporting to graphics files - DCArrayKokkos node_scalar_fields(num_node_scalar_vars, num_nodes, "node_scalars"); - DCArrayKokkos node_vector_fields(num_node_vector_vars, num_nodes, 3, "node_tenors"); + DistributedDFArray node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); + DistributedDFArray node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); + DistributedFArray host_node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); + DistributedFArray host_node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); concatenate_nodal_fields(State.node, node_scalar_fields, node_vector_fields, SimulationParameters.output_options.output_node_state, dt, - num_nodes, + num_local_nodes, num_dims, node_mass_id, node_vel_id, @@ -3428,50 +3482,118 @@ class MeshWriter node_scalar_fields.update_host(); node_vector_fields.update_host(); + copy_nodal_fields(host_node_scalar_fields, + host_node_vector_fields, + node_scalar_fields, + node_vector_fields, + SimulationParameters.output_options.output_node_state, + dt, + num_local_nodes, + num_dims, + node_mass_id, + node_vel_id, + node_accel_id, + node_coord_id, + node_grad_level_set_id, + node_temp_id); + + // ************************************************** + // Collective communications for node and elem data + // ************************************************** + + //elem data collective comms + //collective map has all indices on rank 0 and non on other ranks + HostDistributedMap collective_elem_map; + long long int num_collective_elem_indices = 0; + if(myrank==0) num_collective_elem_indices = mesh.global_num_elems; + collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); + + //collective vector and comms to the collective vector for elem fields + DistributedFArray collective_elem_scalar_fields(collective_elem_map, num_elem_scalar_vars, "elem_scalars_collective"); + DistributedFArray collective_elem_tensor_fields(collective_elem_map, num_elem_tensor_vars, 3, 3, "elem_tensors_collective"); + HostCommPlan collective_elem_scalars_comms(collective_elem_scalar_fields, host_elem_scalar_fields); + HostCommPlan collective_elem_tensors_comms(collective_elem_tensor_fields, host_elem_tensor_fields, collective_elem_scalars_comms); + collective_elem_scalars_comms.execute_comms(); + collective_elem_tensors_comms.execute_comms(); + + //host of node in elem for Trilinos template argument compatibility + DistributedFArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem); + + //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) + for (size_t elem_id = 0; elem_id < mesh.num_local_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem(elem_id, node_lid)); + } + } // end for elem_gid + + //collect nodes in elem with a conversion back to global node ids + DistributedFArray collective_nodes_in_elem(collective_elem_map, mesh.num_nodes_in_elem); + HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem, collective_elem_scalars_comms); + + nodes_in_elem_comms.execute_comms(); + + //node data collective comms + //collective map has all indices on rank 0 and non on other ranks + HostDistributedMap collective_node_map; + long long int num_collective_node_indices = 0; + if(myrank==0) num_collective_node_indices = mesh.global_num_nodes; + collective_node_map = HostDistributedMap(mesh.global_num_nodes, num_collective_node_indices); + + //collective vector and comms to the collective vector for node fields + DistributedFArray collective_node_scalar_fields(collective_node_map, num_node_scalar_vars); + DistributedFArray collective_node_vector_fields(collective_node_map, num_node_vector_vars); + HostCommPlan collective_node_scalars_comms(collective_node_scalar_fields, host_node_scalar_fields); + HostCommPlan collective_node_vectors_comms(collective_node_vector_fields, host_node_vector_fields, collective_node_scalars_comms); + collective_node_scalars_comms.execute_comms(); + collective_node_vectors_comms.execute_comms(); + // ******************************** - // Write the nodal and elem fields + // Write the collective nodal and elem fields // ******************************** + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); if (SimulationParameters.output_options.format == output_options::viz || SimulationParameters.output_options.format == output_options::viz_and_state) { + if(myrank==0){ + // create the folder structure if it does not exist + struct stat st; - // create the folder structure if it does not exist - struct stat st; - - if (stat("vtk", &st) != 0) { - int returnCode = system("mkdir vtk"); + if (stat("vtk", &st) != 0) { + int returnCode = system("mkdir vtk"); - if (returnCode == 1) { - std::cout << "Unable to make vtk directory" << std::endl; - } - } - else{ - if(solver_id==0 && graphics_id==0){ - // delete the existing files inside - int returnCode = system("rm vtk/Fierro*"); if (returnCode == 1) { - std::cout << "Unable to clear vtk/Fierro directory" << std::endl; + std::cout << "Unable to make vtk directory" << std::endl; } } - } - - if (stat("vtk/data", &st) != 0) { - int returnCode = system("mkdir vtk/data"); - if (returnCode == 1) { - std::cout << "Unable to make vtk/data directory" << std::endl; + else{ + if(solver_id==0 && graphics_id==0){ + // delete the existing files inside + int returnCode = system("rm vtk/Fierro*"); + if (returnCode == 1) { + std::cout << "Unable to clear vtk/Fierro directory" << std::endl; + } + } } - } - else{ - if(solver_id==0 && graphics_id==0){ - // delete the existing files inside the folder - int returnCode = system("rm vtk/data/Fierro*"); + + if (stat("vtk/data", &st) != 0) { + int returnCode = system("mkdir vtk/data"); if (returnCode == 1) { - std::cout << "Unable to clear vtk/data directory" << std::endl; + std::cout << "Unable to make vtk/data directory" << std::endl; + } + } + else{ + if(solver_id==0 && graphics_id==0){ + // delete the existing files inside the folder + int returnCode = system("rm vtk/data/Fierro*"); + if (returnCode == 1) { + std::cout << "Unable to clear vtk/data directory" << std::endl; + } } } } - // call the .vtu writer for element fields std::string elem_fields_name = "fields"; @@ -3492,8 +3614,8 @@ class MeshWriter node_vector_var_names, elem_fields_name, graphics_id, - num_nodes, - num_elems, + mesh.global_num_nodes, + mesh.global_num_elems, num_nodes_in_elem, Pn_order, num_dims, @@ -3847,7 +3969,7 @@ class MeshWriter //collective vector and comms to the collective vector for elem fields DistributedCArray collective_elem_fields(collective_elem_map, num_scalar_vars); - HostCommPlan collective_elem_comms(collective_elem_fields, elem_fields); + HostCommPlanLR collective_elem_comms(collective_elem_fields, elem_fields); collective_elem_comms.execute_comms(); //host of node in elem for Trilinos template argument compatibility @@ -3862,7 +3984,7 @@ class MeshWriter //collect nodes in elem with a conversion back to global node ids DistributedCArray collective_nodes_in_elem(collective_elem_map, mesh.num_nodes_in_elem); - HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem); + HostCommPlanLR nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem); nodes_in_elem_comms.execute_comms(); @@ -3920,7 +4042,7 @@ class MeshWriter //collective vector and comms to the collective vector for node fields DistributedCArray collective_vec_fields(collective_node_map, num_vec_vars, 3); - HostCommPlan collective_node_comms(collective_vec_fields, vec_fields); + HostCommPlanLR collective_node_comms(collective_vec_fields, vec_fields); collective_node_comms.execute_comms(); if(myrank==0){ @@ -4543,8 +4665,8 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void concatenate_elem_fields(const MaterialPoint_t& MaterialPoints, const GaussPoint_t& GaussPoints, - DCArrayKokkos& elem_scalar_fields, - DCArrayKokkos& elem_tensor_fields, + DistributedDFArray& elem_scalar_fields, + DistributedDFArray& elem_tensor_fields, const DRaggedRightArrayKokkos& MaterialToMeshMaps_elem, const std::vector& output_elem_state, const std::vector& output_gauss_pt_states, @@ -4577,7 +4699,7 @@ class MeshWriter size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - elem_scalar_fields(den_id, elem_gid) += MaterialPoints.den(mat_id, mat_elem_lid)* + elem_scalar_fields(elem_gid, den_id) += MaterialPoints.den(mat_id, mat_elem_lid)* MaterialPoints.volfrac(mat_id, mat_elem_lid)* MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); }); @@ -4589,7 +4711,7 @@ class MeshWriter size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - elem_scalar_fields(pres_id, elem_gid) += MaterialPoints.pres(mat_id, mat_elem_lid)* + elem_scalar_fields(elem_gid, pres_id) += MaterialPoints.pres(mat_id, mat_elem_lid)* MaterialPoints.volfrac(mat_id, mat_elem_lid)* MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); }); @@ -4602,7 +4724,7 @@ class MeshWriter // field // extensive ie here, but after this function, it will become specific ie - elem_scalar_fields(sie_id, elem_gid) += MaterialPoints.mass(mat_id, mat_elem_lid)* + elem_scalar_fields(elem_gid, sie_id) += MaterialPoints.mass(mat_id, mat_elem_lid)* MaterialPoints.sie(mat_id, mat_elem_lid); }); break; @@ -4613,7 +4735,7 @@ class MeshWriter size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - elem_scalar_fields(sspd_id, elem_gid) += MaterialPoints.sspd(mat_id, mat_elem_lid)* + elem_scalar_fields(elem_gid, sspd_id) += MaterialPoints.sspd(mat_id, mat_elem_lid)* MaterialPoints.volfrac(mat_id, mat_elem_lid)* MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); }); @@ -4625,7 +4747,7 @@ class MeshWriter size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - elem_scalar_fields(mass_id, elem_gid) += MaterialPoints.mass(mat_id, mat_elem_lid); + elem_scalar_fields(elem_gid, mass_id) += MaterialPoints.mass(mat_id, mat_elem_lid); }); break; // --------------- @@ -4644,7 +4766,7 @@ class MeshWriter for(size_t j=0; j<3; j++){ // stress tensor - elem_tensor_fields(stress_id, elem_gid, i, j) += + elem_tensor_fields(elem_gid, stress_id, i, j) += MaterialPoints.stress(mat_id, mat_elem_lid,i,j) * MaterialPoints.volfrac(mat_id, mat_elem_lid)* MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); @@ -4661,7 +4783,7 @@ class MeshWriter size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - elem_scalar_fields(conductivity_id, elem_gid) += MaterialPoints.conductivity(mat_id, mat_elem_lid)* + elem_scalar_fields(elem_gid, conductivity_id) += MaterialPoints.conductivity(mat_id, mat_elem_lid)* MaterialPoints.volfrac(mat_id, mat_elem_lid)* MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); }); @@ -4674,7 +4796,7 @@ class MeshWriter size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - elem_scalar_fields(specific_heat_id, elem_gid) += MaterialPoints.specific_heat(mat_id, mat_elem_lid)* + elem_scalar_fields(elem_gid, specific_heat_id) += MaterialPoints.specific_heat(mat_id, mat_elem_lid)* MaterialPoints.volfrac(mat_id, mat_elem_lid)* MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); }); @@ -4709,14 +4831,14 @@ class MeshWriter case gauss_pt_state::volume: FOR_ALL(elem_gid, 0, num_elems, { - elem_scalar_fields(vol_id, elem_gid) = GaussPoints.vol(elem_gid); + elem_scalar_fields(elem_gid, vol_id) = GaussPoints.vol(elem_gid); }); break; case gauss_pt_state::divergence_velocity: FOR_ALL(elem_gid, 0, num_elems, { - elem_scalar_fields(div_id, elem_gid) = GaussPoints.div(elem_gid); + elem_scalar_fields(elem_gid, div_id) = GaussPoints.div(elem_gid); }); break; @@ -4724,7 +4846,7 @@ class MeshWriter case gauss_pt_state::level_set: FOR_ALL(elem_gid, 0, num_elems, { - elem_scalar_fields(level_set_id, elem_gid) = GaussPoints.level_set(elem_gid); + elem_scalar_fields(elem_gid, level_set_id) = GaussPoints.level_set(elem_gid); }); break; @@ -4735,7 +4857,7 @@ class MeshWriter FOR_ALL(elem_gid, 0, num_elems, { for (size_t i=0; i<3; i++){ for(size_t j=0; j<3; j++){ - elem_tensor_fields(vel_grad_id, elem_gid, i, j) = + elem_tensor_fields(elem_gid, vel_grad_id, i, j) = GaussPoints.vel_grad(elem_gid, i, j); } } // end for @@ -4753,6 +4875,196 @@ class MeshWriter } // end of function + ///////////////////////////////////////////////////////////////////////////// + /// + /// \fn copy_elem_fields + /// + /// \brief A function to assign dual values to host only array for tpetra compatibility + /// + /// + /// \param MaterialPoints a struct containing the material point state arrays + /// \param elem_scalar_fields the scalar fields + /// \param elem_tensor_fields the tensor fields + /// \param MaterialToMeshMaps_elem a listing of the element ids the material resides in + /// \param output_elem_state a std::vector of enums specifying the elem avg outputs + /// \param num_mat_elems the number of elements the material resides in + /// \param mat_id the index for the material + /// + ///////////////////////////////////////////////////////////////////////////// + void copy_elem_fields(DistributedDFArray& elem_scalar_fields, + DistributedDFArray& elem_tensor_fields, + DistributedFArray& elem_scalar_fields, + DistributedFArray& elem_tensor_fields, + const DRaggedRightArrayKokkos& MaterialToMeshMaps_elem, + const std::vector& output_elem_state, + const std::vector& output_gauss_pt_states, + const size_t num_mat_elems, + const size_t mat_id, + const size_t num_elems, + const int den_id, + const int pres_id, + const int sie_id, + const int sspd_id, + const int mass_id, + const int stress_id, + const int vol_id, + const int div_id, + const int level_set_id, + const int vel_grad_id, + const int conductivity_id, + const int specific_heat_id) + { + + // --- loop over the material point states + + for (auto field : output_elem_state){ + switch(field){ + // scalar vars + case material_pt_state::density: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + host_elem_scalar_fields(elem_gid, den_id) = elem_scalar_fields.host(elem_gid, den_id); + } + break; + case material_pt_state::pressure: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + host_elem_scalar_fields(elem_gid, pres_id) = elem_scalar_fields.host(elem_gid, pres_id); + } + break; + case material_pt_state::specific_internal_energy: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + // extensive ie here, but after this function, it will become specific ie + host_elem_scalar_fields(elem_gid, sie_id) = elem_scalar_fields.host(elem_gid, sie_id); + } + break; + case material_pt_state::sound_speed: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + host_elem_scalar_fields(elem_gid, sspd_id) = elem_scalar_fields.host(elem_gid, sspd_id); + } + break; + case material_pt_state::mass: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + host_elem_scalar_fields(elem_gid, mass_id) = elem_scalar_fields.host(elem_gid, mass_id); + } + break; + // --------------- + // tensor vars + // --------------- + case material_pt_state::stress: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + // average tensor fields, it is always 3D + // note: paraview is row-major, CArray convention + for (size_t i=0; i<3; i++){ + for(size_t j=0; j<3; j++){ + + // stress tensor + host_elem_tensor_fields(elem_gid, stress_id, i, j) = elem_tensor_fields.host(elem_gid, stress_id, i, j); + } // end for + } // end for + } + break; + + // thermal solver vars + case material_pt_state::thermal_conductivity: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + host_elem_scalar_fields(elem_gid, conductivity_id) = elem_scalar_fields.host(elem_gid, conductivity_id); + } + break; + + case material_pt_state::specific_heat: + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + + // field + host_elem_scalar_fields(elem_gid, specific_heat_id) = elem_scalar_fields.host(elem_gid, specific_heat_id); + } + break; + + + // add other variables here + + // not used variables + case material_pt_state::volume_fraction: + break; + case material_pt_state::eroded_flag: + break; + case material_pt_state::elastic_modulii: + break; + case material_pt_state::shear_modulii: + break; + case material_pt_state::poisson_ratios: + break; + case material_pt_state::heat_flux: + break; + } // end switch + }// end for over mat point state + + + // --- add loop over gauss points --- + + // export element centric data + for (auto field : output_gauss_pt_states){ + switch(field){ + // scalars + case gauss_pt_state::volume: + + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + host_elem_scalar_fields(elem_gid, vol_id) = elem_scalar_fields.host(elem_gid, vol_id); + } + + break; + case gauss_pt_state::divergence_velocity: + + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + host_elem_scalar_fields(elem_gid, div_id) = elem_scalar_fields.host(elem_gid, div_id); + } + + break; + + case gauss_pt_state::level_set: + + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + host_elem_scalar_fields(elem_gid, level_set_id) = elem_scalar_fields.host(elem_gid, level_set_id); + } + + break; + + // tensors + case gauss_pt_state::gradient_velocity: + // note: paraview is row-major, CArray convention + for(int elem_gid = 0; elem_gid < num_elems; elem_gid++){ + for (size_t i=0; i<3; i++){ + for(size_t j=0; j<3; j++){ + host_elem_tensor_fields(elem_gid, vel_grad_id, i, j) = + elem_tensor_fields.host(elem_gid, vel_grad_id, i, j); + } + } // end for + } + + break; + + // add other gauss variables here + + } // end switch + } // end loop over gauss_pt_states + + + // --- add end gauss point loop -- + + } // end of function + ///////////////////////////////////////////////////////////////////////////// /// /// \fn concatenate_mat_fields @@ -4918,7 +5230,7 @@ class MeshWriter /// /// \fn concatenate_nodal_fields /// - /// \brief A function to calculate the average of elem fields + /// \brief A function to calculate the average of nodal fields /// /// /// \param Node a struct containing the material point state arrays @@ -4931,8 +5243,8 @@ class MeshWriter /// ///////////////////////////////////////////////////////////////////////////// void concatenate_nodal_fields(const node_t& Node, - DCArrayKokkos& node_scalar_fields, - DCArrayKokkos& node_vector_fields, + DistributedDFArray& node_scalar_fields, + DistributedDFArray& node_vector_fields, std::vector& output_node_states, double dt, const size_t num_nodes, @@ -4950,13 +5262,13 @@ class MeshWriter case node_state::mass: FOR_ALL(node_gid, 0, num_nodes, { - node_scalar_fields(node_mass_id, node_gid) = Node.mass(node_gid); + node_scalar_fields(node_gid, node_mass_id) = Node.mass(node_gid); }); break; case node_state::temp: FOR_ALL(node_gid, 0, num_nodes, { - node_scalar_fields(node_temp_id, node_gid) = Node.temp(node_gid); + node_scalar_fields(node_gid, node_temp_id) = Node.temp(node_gid); }); break; @@ -4967,13 +5279,13 @@ class MeshWriter FOR_ALL(node_gid, 0, num_nodes, { - node_vector_fields(node_coord_id, node_gid, 0) = Node.coords(node_gid, 0); - node_vector_fields(node_coord_id, node_gid, 1) = Node.coords(node_gid, 1); + node_vector_fields(node_gid, node_coord_id, 0) = Node.coords(node_gid, 0); + node_vector_fields(node_gid, node_coord_id, 1) = Node.coords(node_gid, 1); if (num_dims == 2) { - node_vector_fields(node_coord_id, node_gid, 2) = 0.0; + node_vector_fields(node_gid, node_coord_id, 2) = 0.0; } else{ - node_vector_fields(node_coord_id, node_coord_id, 2) = Node.coords(node_gid, 2); + node_vector_fields(node_gid, node_coord_id, 2) = Node.coords(node_gid, 2); } // end if }); // end parallel for @@ -4984,23 +5296,23 @@ class MeshWriter FOR_ALL(node_gid, 0, num_nodes, { // velocity, var is node_vel_id - node_vector_fields(node_vel_id, node_gid, 0) = Node.vel(node_gid, 0); - node_vector_fields(node_vel_id, node_gid, 1) = Node.vel(node_gid, 1); + node_vector_fields(node_gid, node_vel_id, 0) = Node.vel(node_gid, 0); + node_vector_fields(node_gid, node_vel_id, 1) = Node.vel(node_gid, 1); if (num_dims == 2) { - node_vector_fields(node_vel_id, node_gid, 2) = 0.0; + node_vector_fields(node_gid, node_vel_id, 2) = 0.0; } else{ - node_vector_fields(node_vel_id, node_gid, 2) = Node.vel(node_gid, 2); + node_vector_fields(node_gid, node_vel_id, 2) = Node.vel(node_gid, 2); } // end if // accellerate, var is node_accel_id - node_vector_fields(node_accel_id, node_gid, 0) = (Node.vel(node_gid, 0) - Node.vel_n0(node_gid, 0))/dt; - node_vector_fields(node_accel_id, node_gid, 1) = (Node.vel(node_gid, 1) - Node.vel_n0(node_gid, 1))/dt; + node_vector_fields(node_gid, node_accel_id 0) = (Node.vel(node_gid, 0) - Node.vel_n0(node_gid, 0))/dt; + node_vector_fields(node_gid, node_accel_id, 1) = (Node.vel(node_gid, 1) - Node.vel_n0(node_gid, 1))/dt; if (num_dims == 2) { - node_vector_fields(node_accel_id, node_gid, 2) = 0.0; + node_vector_fields(node_gid, node_accel_id, 2) = 0.0; } else{ - node_vector_fields(node_accel_id, node_gid, 2) = (Node.vel(node_gid, 2) - Node.vel_n0(node_gid, 2))/dt; + node_vector_fields(node_gid, node_accel_id, 2) = (Node.vel(node_gid, 2) - Node.vel_n0(node_gid, 2))/dt; } // end if }); // end parallel for @@ -5013,13 +5325,141 @@ class MeshWriter FOR_ALL(node_gid, 0, num_nodes, { // velocity, var is node_vel_id - node_vector_fields(node_grad_level_set_id, node_gid, 0) = Node.gradient_level_set(node_gid, 0); - node_vector_fields(node_grad_level_set_id, node_gid, 1) = Node.gradient_level_set(node_gid, 1); + node_vector_fields(node_gid, node_grad_level_set_id, 0) = Node.gradient_level_set(node_gid, 0); + node_vector_fields(node_gid, node_grad_level_set_id, 1) = Node.gradient_level_set(node_gid, 1); if (num_dims == 2) { - node_vector_fields(node_grad_level_set_id, node_gid, 2) = 0.0; + node_vector_fields(node_gid, node_grad_level_set_id, 2) = 0.0; } else{ - node_vector_fields(node_grad_level_set_id, node_gid, 2) = Node.gradient_level_set(node_gid, 2); + node_vector_fields(node_gid, node_grad_level_set_id, 2) = Node.gradient_level_set(node_gid, 2); + } // end if + + }); // end parallel for + + break; + + // -- not used vars + case node_state::force: + break; + + // heat transer vars + case node_state::heat_transfer: + break; + // tensors + } // end switch + } // end for over + + + + } // end function + + ///////////////////////////////////////////////////////////////////////////// + /// + /// \fn copy_nodal_fields + /// + /// \brief A function to calculate the average of nodal fields + /// + /// + /// \param Node a struct containing the material point state arrays + /// \param elem_scalar_fields the scalar fields + /// \param elem_tensor_fields the tensor fields + /// \param MaterialToMeshMaps_elem a listing of the element ids the material resides in + /// \param output_node_states a std::vector of enums specifying the model + /// \param num_mat_elems the number of elements the material resides in + /// \param mat_id the index for the material + /// + ///////////////////////////////////////////////////////////////////////////// + void copy_nodal_fields(DistributedFArray& host_node_scalar_fields, + DistributedFArray& host_node_vector_fields, + DistributedDFArray& node_scalar_fields, + DistributedDFArray& node_vector_fields, + std::vector& output_node_states, + double dt, + const size_t num_nodes, + const size_t num_dims, + const int node_mass_id, + const int node_vel_id, + const int node_accel_id, + const int node_coord_id, + const int node_grad_level_set_id, + const int node_temp_id) + { + for (auto field : output_node_states){ + switch(field){ + // scalars + case node_state::mass: + + FOR_ALL(node_gid, 0, num_nodes, { + host_node_scalar_fields(node_gid, node_mass_id) = node_scalar_fields.host(node_gid, node_mass_id); + }); + + break; + case node_state::temp: + FOR_ALL(node_gid, 0, num_nodes, { + host_node_scalar_fields(node_gid, node_temp_id) = node_scalar_fields.host(node_gid, node_temp_id); + }); + + break; + + // vector fields + + case node_state::coords: + + FOR_ALL(node_gid, 0, num_nodes, { + + host_node_vector_fields(node_gid, node_coord_id, 0) = host.node_vector_fields(node_gid, node_coord_id, 0); + host_node_vector_fields(node_gid, node_coord_id, 1) = host.node_vector_fields(node_gid, node_coord_id, 1); + if (num_dims == 2) { + host_node_vector_fields(node_gid, node_coord_id, 2) = 0.0; + } + else{ + host_node_vector_fields(node_gid, node_coord_id, 2) = host.node_vector_fields(node_gid, node_coord_id, 2); + } // end if + + }); // end parallel for + + break; + case node_state::velocity: + + FOR_ALL(node_gid, 0, num_nodes, { + + // velocity, var is node_vel_id + host_node_vector_fields(node_gid, node_vel_id, 0) = node_vector_fields.host(node_gid, node_vel_id, 0); + host_node_vector_fields(node_gid, node_vel_id, 1) = node_vector_fields.host(node_gid, node_vel_id, 1); + if (num_dims == 2) { + host_node_vector_fields(node_gid, node_vel_id, 2) = 0.0; + } + else{ + host_node_vector_fields(node_gid, node_vel_id, 2) = node_vector_fields.host(node_gid, node_vel_id, 2); + } // end if + + // accellerate, var is node_accel_id + host_node_vector_fields(node_gid, node_accel_id, 0) = node_vector_fields.host(node_gid, node_accel_id, 0); + host_node_vector_fields(node_gid, node_accel_id, 1) = node_vector_fields.host(node_gid, node_accel_id, 1); + if (num_dims == 2) { + host_node_vector_fields(node_gid, node_accel_id, 2) = 0.0; + } + else{ + host_node_vector_fields(node_gid, node_accel_id, 2) = node_vector_fields.host(node_gid, node_accel_id, 2); + } // end if + + }); // end parallel for + + break; + + + case node_state::gradient_level_set: + + FOR_ALL(node_gid, 0, num_nodes, { + + // velocity, var is node_vel_id + host_node_vector_fields(node_gid, node_grad_level_set_id, 0) = node_vector_fields.host(node_gid, node_grad_level_set_id, 0); + host_node_vector_fields(node_gid, node_grad_level_set_id, 1) = node_vector_fields.host(node_gid, node_grad_level_set_id, 1); + if (num_dims == 2) { + host_node_vector_fields(node_gid, node_grad_level_set_id, 2) = 0.0; + } + else{ + host_node_vector_fields(node_gid, node_grad_level_set_id, 2) = node_vector_fields.host(node_gid, node_grad_level_set_id, 2); } // end if }); // end parallel for @@ -5055,12 +5495,12 @@ class MeshWriter /// ///////////////////////////////////////////////////////////////////////////// void write_vtu( - const ViewCArray& node_coords_host, - const ViewCArray& nodes_in_elem_host, - const DCArrayKokkos& elem_scalar_fields, - const DCArrayKokkos& elem_tensor_fields, - const DCArrayKokkos& node_scalar_fields, - const DCArrayKokkos& node_vector_fields, + const DistributedCArray& node_coords_host, + const DistributedCArray& nodes_in_elem_host, + const DistributedFArray& elem_scalar_fields, + const DistributedFArray& elem_tensor_fields, + const DistributedFArray& node_scalar_fields, + const DistributedFArray& node_vector_fields, const std::vector& elem_scalar_var_names, const std::vector& elem_tensor_var_names, const std::vector& node_scalar_var_names, @@ -5074,7 +5514,11 @@ class MeshWriter const size_t num_dims, const size_t solver_id ) - { + { + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + FILE* out[20]; // the output files that are written to char filename[100]; // char string int max_len = sizeof filename; @@ -5237,9 +5681,9 @@ class MeshWriter for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { fprintf(out[0], " %f %f %f\n", - node_vector_fields.host(a_var, node_gid, 0), - node_vector_fields.host(a_var, node_gid, 1), - node_vector_fields.host(a_var, node_gid, 2)); + node_vector_fields(a_var, node_gid, 0), + node_vector_fields(a_var, node_gid, 1), + node_vector_fields(a_var, node_gid, 2)); } // end for nodes fprintf(out[0], " \n"); @@ -5250,7 +5694,7 @@ class MeshWriter for (int a_var = 0; a_var < num_node_scalar_vars; a_var++) { fprintf(out[0], " \n", node_scalar_var_names[a_var].c_str()); for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], " %f\n", node_scalar_fields.host(a_var, node_gid)); + fprintf(out[0], " %f\n", node_scalar_fields(a_var, node_gid)); } // end for nodes fprintf(out[0], " \n"); } // end for vec_vars @@ -5275,7 +5719,7 @@ class MeshWriter fprintf(out[0], " \n", elem_scalar_var_names[a_var].c_str()); // the 1 is number of scalar components [1:4] for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) { - fprintf(out[0], " %f\n", elem_scalar_fields.host(a_var, elem_gid)); + fprintf(out[0], " %f\n", elem_scalar_fields(a_var, elem_gid)); } // end for elem fprintf(out[0], " \n"); } // end for elem scalar_vars @@ -5290,7 +5734,7 @@ class MeshWriter // Txx Txy Txz Tyx Tyy Tyz Tzx Tzy Tzz for (size_t i=0; i<3; i++){ for(size_t j=0; j<3; j++){ - fprintf(out[0], " %f ", elem_tensor_fields.host(a_var, elem_gid, i, j)); + fprintf(out[0], " %f ", elem_tensor_fields(a_var, elem_gid, i, j)); } // end j } // end i } // end for elem diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index 7e3d4a7b0..d90f636bc 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -69,9 +69,13 @@ using DistributedDCArray = TpetraDCArray; template using DistributedCArray = TpetraDCArray; template +using DistributedFArray = TpetraDFArray; +template using CommPlan = TpetraLRCommunicationPlan; template -using HostCommPlan = TpetraLRCommunicationPlan; +using HostCommPlanLR = TpetraLRCommunicationPlan; +template +using HostCommPlan = TpetraCommunicationPlan; template From 0c3d8766c5a74cf0a7b2df40449699ba164f5533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 1 Aug 2025 00:06:17 -0600 Subject: [PATCH 36/66] WIP: collective vtu writer --- .../src/common/include/mesh_io.h | 224 ++++++++++-------- 1 file changed, 128 insertions(+), 96 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index ac276f9ce..caf0c417f 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3547,6 +3547,17 @@ class MeshWriter collective_node_scalars_comms.execute_comms(); collective_node_vectors_comms.execute_comms(); + //collect nodal coordinates + //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) + DistributedFArray host_node_coords(host_node_map, mesh.num_dims); + for (size_t node_id = 0; node_id < mesh.num_local_nodes; node_id++) { + for (int idim = 0; idim < mesh.num_dims; idim++) { + host_node_coords(elem_id, node_lid) = State.node.coords.host(node_gid, 0); + } + } // end for elem_gid + DistributedFArray collective_node_coords(collective_node_map, mesh.num_dims); + HostCommPlan collective_node_coords_comms(collective_node_coords, host_node_coords, collective_node_scalars_comms); + collective_node_coords_comms.execute_comms(); // ******************************** // Write the collective nodal and elem fields @@ -3597,17 +3608,13 @@ class MeshWriter // call the .vtu writer for element fields std::string elem_fields_name = "fields"; - // make a view of node coords for passing into functions - ViewCArray node_coords_host(&State.node.coords.host(0,0), num_nodes, num_dims); - ViewCArray nodes_in_elem_host(&mesh.nodes_in_elem.host(0,0), num_elems, num_nodes_in_elem); - - write_vtu(node_coords_host, - nodes_in_elem_host, - elem_scalar_fields, - elem_tensor_fields, - node_scalar_fields, - node_vector_fields, + write_vtu(collective_node_coords, + collective_nodes_in_elem, + collective_elem_scalar_fields, + collective_elem_tensor_fields, + collective_node_scalar_fields, + collective_node_vector_fields, elem_scalar_var_names, elem_tensor_var_names, node_scalar_var_names, @@ -3633,77 +3640,101 @@ class MeshWriter for (int mat_id = 0; mat_id < num_mats; mat_id++) { - const size_t num_mat_elems = State.MaterialToMeshMaps.num_material_elems.host(mat_id); - - // only save material data if the mat lives on the mesh, ie. has state allocated - if (num_mat_elems>0){ - - // set the nodal vars to zero size, we don't write these fields again - node_scalar_var_names.clear(); - node_vector_var_names.clear(); - - // the arrays storing all the material field data - DCArrayKokkos mat_elem_scalar_fields(num_mat_pt_scalar_vars, num_mat_elems, "mat_pt_scalars"); - DCArrayKokkos mat_elem_tensor_fields(num_mat_pt_tensor_vars, num_mat_elems, 3, 3, "mat_pt_tensors"); - - - // concatenate material fields into a single array - concatenate_mat_fields(State.MaterialPoints, - mat_elem_scalar_fields, - mat_elem_tensor_fields, - State.MaterialToMeshMaps.elem, - SimulationParameters.output_options.output_mat_pt_state, - num_mat_elems, - mat_id, - mat_den_id, - mat_pres_id, - mat_sie_id, - mat_sspd_id, - mat_mass_id, - mat_volfrac_id, - mat_geo_volfrac_id, - mat_eroded_id, - mat_stress_id, - mat_conductivity_id, - mat_specific_heat_id); - Kokkos::fence(); - mat_elem_scalar_fields.update_host(); - mat_elem_tensor_fields.update_host(); - - - std::string str_mat_val = std::to_string(mat_id); - std::string mat_fields_name = "mat"; - mat_fields_name += str_mat_val; // add the mat number - - // save the nodes belonging to this part (i.e., the material) - DCArrayKokkos mat_node_coords(num_nodes,num_dims, "mat_node_coords"); - DCArrayKokkos mat_nodes_in_mat_elem(num_mat_elems, num_nodes_in_elem, "mat_nodes_in_mat_elem"); - - // the number of actual nodes belonging to the part (i.e., the material) - size_t num_mat_nodes = 0; - - // build a unique mesh (element and nodes) for the material (i.e., the part) - build_material_elem_node_lists(mesh, - State.node.coords, - mat_node_coords, - mat_nodes_in_mat_elem, - State.MaterialToMeshMaps.elem, - mat_id, - num_mat_nodes, - num_mat_elems, - num_nodes_in_elem, - num_dims); - - ViewCArray mat_node_coords_host(&mat_node_coords.host(0,0), num_mat_nodes, num_dims); - ViewCArray mat_nodes_in_elem_host(&mat_nodes_in_mat_elem.host(0,0), num_mat_elems, num_nodes_in_elem); - + const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_material_local_elems.host(mat_id); + //array storing number of local elems for this material on each process + CArray processes_num_local_mat_elems; + if(myrank==0){ + processes_num_local_mat_elems = CArray(nranks); + } + MPI_Gather(&num_mat_elems,1,MPI_LONG_LONG_INT,processes_num_local_mat_elems.pointer(),1, + MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + //set global element indices on this rank + HostDistributedMap host_mat_elem_map; + DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ + global_indices_of_local_mat_elems(ielem) = State.MaterialToMeshMaps.elem(ielem); + } + host_mat_elem_map = HostDistributedMap(global_indices_of_local_nodes); + + //collect global element indices on rank 0 for this mat + //tally total number of mat elems for rank 0 + DCArrayKokkos global_indices_of_collective_mat_elems; + long long int num_mat_collective_elems = 0; + if(myrank==0){ + for(int irank=0; irank < nranks; irank++){ + num_mat_collective_elems += processes_num_local_mat_elems(irank); + } + global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_local_elems); + } + MPI_Gatherv(); + // set the nodal vars to zero size, we don't write these fields again + node_scalar_var_names.clear(); + node_vector_var_names.clear(); + + // the arrays storing all the material field data + DCArrayKokkos mat_elem_scalar_fields(num_mat_pt_scalar_vars, num_mat_elems, "mat_pt_scalars"); + DCArrayKokkos mat_elem_tensor_fields(num_mat_pt_tensor_vars, num_mat_elems, 3, 3, "mat_pt_tensors"); + + + // concatenate material fields into a single array + concatenate_mat_fields(State.MaterialPoints, + collective_mat_elem_scalar_fields, + collective_mat_elem_tensor_fields, + State.MaterialToMeshMaps.elem, + SimulationParameters.output_options.output_mat_pt_state, + num_mat_local_elems, + mat_id, + mat_den_id, + mat_pres_id, + mat_sie_id, + mat_sspd_id, + mat_mass_id, + mat_volfrac_id, + mat_geo_volfrac_id, + mat_eroded_id, + mat_stress_id, + mat_conductivity_id, + mat_specific_heat_id); + Kokkos::fence(); + mat_elem_scalar_fields.update_host(); + mat_elem_tensor_fields.update_host(); + + + std::string str_mat_val = std::to_string(mat_id); + std::string mat_fields_name = "mat"; + mat_fields_name += str_mat_val; // add the mat number + + // save the nodes belonging to this part (i.e., the material) + DCArrayKokkos mat_nodes_in_mat_elem(num_mat_elems, num_nodes_in_elem, "mat_nodes_in_mat_elem"); + + // the number of actual nodes belonging to the part (i.e., the material) + size_t num_mat_nodes = 0; + + // build a unique mesh (element and nodes) for the material (i.e., the part) + build_material_elem_node_lists(mesh, + State.node.coords, + mat_node_coords, + mat_nodes_in_mat_elem, + State.MaterialToMeshMaps.elem, + mat_id, + num_mat_nodes, + num_mat_elems, + num_nodes_in_elem, + num_dims); + + ViewCArray mat_node_coords_host(&mat_node_coords.host(0,0), num_mat_nodes, num_dims); + ViewCArray mat_nodes_in_elem_host(&mat_nodes_in_mat_elem.host(0,0), num_mat_elems, num_nodes_in_elem); + + // only write material data if the mat lives on the mesh, ie. has state allocated + if (global_num_mat_elems>0){ // write out a vtu file this - write_vtu(mat_node_coords_host, - mat_nodes_in_elem_host, - mat_elem_scalar_fields, - mat_elem_tensor_fields, - node_scalar_fields, - node_vector_fields, + write_vtu(collective_node_coords, + collective_mat_nodes_in_elem_host, + collective_mat_elem_scalar_fields, + collective_mat_elem_tensor_fields, + collective_node_scalar_fields, + collective_node_vector_fields, mat_elem_scalar_var_names, mat_elem_tensor_var_names, node_scalar_var_names, @@ -3754,22 +3785,23 @@ class MeshWriter // call the vtm file writer std::string mat_fields_name = "mat"; - write_vtm(graphics_times, - elem_fields_name, - mat_fields_name, - time_value, - graphics_id, - num_mat_files_written, - write_mesh_state, - write_mat_pt_state, - solver_id); - - // call the pvd file writer - write_pvd(graphics_times, - time_value, - graphics_id, - solver_id); - + if(myrank==0){ + write_vtm(graphics_times, + elem_fields_name, + mat_fields_name, + time_value, + graphics_id, + num_mat_files_written, + write_mesh_state, + write_mat_pt_state, + solver_id); + + // call the pvd file writer + write_pvd(graphics_times, + time_value, + graphics_id, + solver_id); + } // increment graphics id counter graphics_id++; // this is private variable in the class From 8a35e3eff7be747ade91a712321dfcdda765f663 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 3 Aug 2025 17:17:47 -0600 Subject: [PATCH 37/66] WIP: collective vtu writer --- .../src/common/include/mesh_io.h | 152 ++++++++++-------- 1 file changed, 83 insertions(+), 69 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index caf0c417f..aa51217cf 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3642,9 +3642,10 @@ class MeshWriter const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_material_local_elems.host(mat_id); //array storing number of local elems for this material on each process - CArray processes_num_local_mat_elems; + CArray processes_num_local_mat_elems, gatherv_displacements; if(myrank==0){ - processes_num_local_mat_elems = CArray(nranks); + processes_num_local_mat_elems = CArray(nranks); + gatherv_displacements = CArray(nranks); } MPI_Gather(&num_mat_elems,1,MPI_LONG_LONG_INT,processes_num_local_mat_elems.pointer(),1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); @@ -3657,30 +3658,41 @@ class MeshWriter } host_mat_elem_map = HostDistributedMap(global_indices_of_local_nodes); + //allocate arrays for distributed mat elem data + DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_elem_scalar_vars, "mat_elem_scalars"); + DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_elem_tensor_vars, 3, 3, "mat_elem_tensors"); + //collect global element indices on rank 0 for this mat //tally total number of mat elems for rank 0 DCArrayKokkos global_indices_of_collective_mat_elems; long long int num_mat_collective_elems = 0; if(myrank==0){ for(int irank=0; irank < nranks; irank++){ + gatherv_displacements(irank) = num_mat_collective_elems; num_mat_collective_elems += processes_num_local_mat_elems(irank); } global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_local_elems); } - MPI_Gatherv(); + MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, + global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), + gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + //use indices on rank 0 to construct rank 0 collective map for this mat + HostDistributedMap collective_mat_elem_map; + collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); + + //collective storage for scalars and tensors using collective elem mat map + DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_elem_scalar_vars, "mat_elem_scalars_collective"); + DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_elem_tensor_vars, 3, 3, "mat_elem_tensors_collective"); + // set the nodal vars to zero size, we don't write these fields again node_scalar_var_names.clear(); node_vector_var_names.clear(); - // the arrays storing all the material field data - DCArrayKokkos mat_elem_scalar_fields(num_mat_pt_scalar_vars, num_mat_elems, "mat_pt_scalars"); - DCArrayKokkos mat_elem_tensor_fields(num_mat_pt_tensor_vars, num_mat_elems, 3, 3, "mat_pt_tensors"); - - // concatenate material fields into a single array concatenate_mat_fields(State.MaterialPoints, - collective_mat_elem_scalar_fields, - collective_mat_elem_tensor_fields, + host_mat_elem_scalar_fields, + host_mat_elem_tensor_fields, State.MaterialToMeshMaps.elem, SimulationParameters.output_options.output_mat_pt_state, num_mat_local_elems, @@ -3696,9 +3708,6 @@ class MeshWriter mat_stress_id, mat_conductivity_id, mat_specific_heat_id); - Kokkos::fence(); - mat_elem_scalar_fields.update_host(); - mat_elem_tensor_fields.update_host(); std::string str_mat_val = std::to_string(mat_id); @@ -3706,31 +3715,42 @@ class MeshWriter mat_fields_name += str_mat_val; // add the mat number // save the nodes belonging to this part (i.e., the material) - DCArrayKokkos mat_nodes_in_mat_elem(num_mat_elems, num_nodes_in_elem, "mat_nodes_in_mat_elem"); + DistributedFArray mat_nodes_in_mat_elem(host_mat_elem_map, num_nodes_in_elem, "mat_nodes_in_mat_elem"); // the number of actual nodes belonging to the part (i.e., the material) size_t num_mat_nodes = 0; // build a unique mesh (element and nodes) for the material (i.e., the part) - build_material_elem_node_lists(mesh, - State.node.coords, - mat_node_coords, - mat_nodes_in_mat_elem, - State.MaterialToMeshMaps.elem, - mat_id, - num_mat_nodes, - num_mat_elems, - num_nodes_in_elem, - num_dims); - - ViewCArray mat_node_coords_host(&mat_node_coords.host(0,0), num_mat_nodes, num_dims); - ViewCArray mat_nodes_in_elem_host(&mat_nodes_in_mat_elem.host(0,0), num_mat_elems, num_nodes_in_elem); + // build_material_elem_node_lists(mesh, + // State.node.coords, + // mat_node_coords, + // mat_nodes_in_mat_elem, + // State.MaterialToMeshMaps.elem, + // mat_id, + // num_mat_nodes, + // num_mat_elems, + // num_nodes_in_elem, + // num_dims); + + //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 + + //collect nodes in elem for this material on rank 0 + DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); + HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 + + mat_nodes_in_elem_comms.execute_comms();\ + + HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); //doesnt really do comms since all on rank 0 + mat_elem_scalars_comms.execute_comms(); + + HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); //doesnt really do comms since all on rank 0 + mat_elem_tensors_comms.execute_comms(); // only write material data if the mat lives on the mesh, ie. has state allocated if (global_num_mat_elems>0){ // write out a vtu file this write_vtu(collective_node_coords, - collective_mat_nodes_in_elem_host, + collective_mat_nodes_in_mat_elem, collective_mat_elem_scalar_fields, collective_mat_elem_tensor_fields, collective_node_scalar_fields, @@ -5114,8 +5134,8 @@ class MeshWriter /// ///////////////////////////////////////////////////////////////////////////// void concatenate_mat_fields(const MaterialPoint_t& MaterialPoints, - DCArrayKokkos& mat_elem_scalar_fields, - DCArrayKokkos& mat_elem_tensor_fields, + DistributedFArray& mat_elem_scalar_fields, + DistributedFArray& mat_elem_tensor_fields, const DRaggedRightArrayKokkos& MaterialToMeshMaps_elem, const std::vector& output_material_pt_states, const size_t num_mat_elems, @@ -5139,70 +5159,70 @@ class MeshWriter switch(field){ // scalar vars case material_pt_state::density: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { // field - mat_elem_scalar_fields(mat_den_id, mat_elem_lid) = MaterialPoints.den(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_den_id, mat_elem_lid) = MaterialPoints.den.host(mat_id, mat_elem_lid); + } break; case material_pt_state::pressure: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { // field - mat_elem_scalar_fields(mat_pres_id, mat_elem_lid) = MaterialPoints.pres(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_pres_id, mat_elem_lid) = MaterialPoints.pres.host(mat_id, mat_elem_lid); + } break; case material_pt_state::specific_internal_energy: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field // extensive ie here, but after this function, it will become specific ie - mat_elem_scalar_fields(mat_sie_id, mat_elem_lid) = MaterialPoints.sie(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_sie_id, mat_elem_lid) = MaterialPoints.sie.host(mat_id, mat_elem_lid); + } break; case material_pt_state::sound_speed: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_sspd_id, mat_elem_lid) = MaterialPoints.sspd(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_sspd_id, mat_elem_lid) = MaterialPoints.sspd.host(mat_id, mat_elem_lid); + } break; case material_pt_state::mass: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_mass_id, mat_elem_lid) = MaterialPoints.mass(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_mass_id, mat_elem_lid) = MaterialPoints.mass.host(mat_id, mat_elem_lid); + } break; case material_pt_state::volume_fraction: // material volume fraction - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) // field // this is the volume fraction of a material within a part - mat_elem_scalar_fields(mat_volfrac_id, mat_elem_lid) = MaterialPoints.volfrac(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_volfrac_id, mat_elem_lid) = MaterialPoints.volfrac.host(mat_id, mat_elem_lid); + } // geometric volume fraction - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field // this is the geometric volume fraction (interface reconstruction) - mat_elem_scalar_fields(mat_geo_volfrac_id, mat_elem_lid) = MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_geo_volfrac_id, mat_elem_lid) = MaterialPoints.geo_volfrac.host(mat_id, mat_elem_lid); + } break; case material_pt_state::eroded_flag: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_eroded_id, mat_elem_lid) = (double)MaterialPoints.eroded(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_eroded_id, mat_elem_lid) = (double)MaterialPoints.eroded.host(mat_id, mat_elem_lid); + } break; // --------------- // tensor vars // --------------- case material_pt_state::stress: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field // average tensor fields, it is always 3D @@ -5212,33 +5232,27 @@ class MeshWriter // stress tensor mat_elem_tensor_fields(mat_stress_id, mat_elem_lid, i, j) = - MaterialPoints.stress(mat_id, mat_elem_lid,i,j); + MaterialPoints.stress.host(mat_id, mat_elem_lid,i,j); } // end for } // end for - }); + } break; // thermal solver vars case material_pt_state::thermal_conductivity: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { - - // get elem gid - size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_conductivity_id, elem_gid) += MaterialPoints.conductivity(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_conductivity_id, mat_elem_lid) = MaterialPoints.conductivity.host(mat_id, mat_elem_lid); + } break; case material_pt_state::specific_heat: - FOR_ALL(mat_elem_lid, 0, num_mat_elems, { - - // get elem gid - size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_specific_heat_id, elem_gid) += MaterialPoints.specific_heat(mat_id, mat_elem_lid); - }); + mat_elem_scalar_fields(mat_specific_heat_id, mat_elem_lid) = MaterialPoints.specific_heat.host(mat_id, mat_elem_lid); + } break; // add other variables here From 98d8a2bb1a88ffd66dee8a30caf52114ec2f15b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 4 Aug 2025 00:43:58 -0600 Subject: [PATCH 38/66] WIP: collective vtu write --- .../src/common/include/mesh_io.h | 170 ++++++++++++------ 1 file changed, 120 insertions(+), 50 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index aa51217cf..f50b857dc 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -2721,7 +2721,10 @@ class MeshWriter std::vector material_pt_states, const size_t solver_id) { - + + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); // node_state is an enum for possible fields (e.g., coords, velocity, etc.), see state.h // gauss_pt_state is an enum for possible fields (e.g., vol, divergence, etc.) @@ -3562,9 +3565,6 @@ class MeshWriter // ******************************** // Write the collective nodal and elem fields // ******************************** - int myrank, nranks; - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - MPI_Comm_size(MPI_COMM_WORLD,&nranks); if (SimulationParameters.output_options.format == output_options::viz || SimulationParameters.output_options.format == output_options::viz_and_state) { @@ -3608,26 +3608,26 @@ class MeshWriter // call the .vtu writer for element fields std::string elem_fields_name = "fields"; - - write_vtu(collective_node_coords, - collective_nodes_in_elem, - collective_elem_scalar_fields, - collective_elem_tensor_fields, - collective_node_scalar_fields, - collective_node_vector_fields, - elem_scalar_var_names, - elem_tensor_var_names, - node_scalar_var_names, - node_vector_var_names, - elem_fields_name, - graphics_id, - mesh.global_num_nodes, - mesh.global_num_elems, - num_nodes_in_elem, - Pn_order, - num_dims, - solver_id); - + if(myrank==0){ + write_vtu(collective_node_coords, + collective_nodes_in_elem, + collective_elem_scalar_fields, + collective_elem_tensor_fields, + collective_node_scalar_fields, + collective_node_vector_fields, + elem_scalar_var_names, + elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + elem_fields_name, + graphics_id, + mesh.global_num_nodes, + mesh.global_num_elems, + num_nodes_in_elem, + Pn_order, + num_dims, + solver_id); + } // ******************************** // Build and write the mat fields @@ -3647,7 +3647,7 @@ class MeshWriter processes_num_local_mat_elems = CArray(nranks); gatherv_displacements = CArray(nranks); } - MPI_Gather(&num_mat_elems,1,MPI_LONG_LONG_INT,processes_num_local_mat_elems.pointer(),1, + MPI_Gather(&num_mat_local_elems,1,MPI_LONG_LONG_INT,processes_num_local_mat_elems.pointer(),1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); //set global element indices on this rank @@ -3714,47 +3714,64 @@ class MeshWriter std::string mat_fields_name = "mat"; mat_fields_name += str_mat_val; // add the mat number - // save the nodes belonging to this part (i.e., the material) - DistributedFArray mat_nodes_in_mat_elem(host_mat_elem_map, num_nodes_in_elem, "mat_nodes_in_mat_elem"); - // the number of actual nodes belonging to the part (i.e., the material) size_t num_mat_nodes = 0; - // build a unique mesh (element and nodes) for the material (i.e., the part) - // build_material_elem_node_lists(mesh, - // State.node.coords, - // mat_node_coords, - // mat_nodes_in_mat_elem, - // State.MaterialToMeshMaps.elem, - // mat_id, - // num_mat_nodes, - // num_mat_elems, - // num_nodes_in_elem, - // num_dims); - //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 //collect nodes in elem for this material on rank 0 DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 - - mat_nodes_in_elem_comms.execute_comms();\ + mat_nodes_in_elem_comms.execute_comms(); HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); //doesnt really do comms since all on rank 0 mat_elem_scalars_comms.execute_comms(); HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); //doesnt really do comms since all on rank 0 mat_elem_tensors_comms.execute_comms(); + + //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat + + // build a unique mesh (element and nodes) for the material (i.e., the part) + DCArrayKokkos collective_mat_node_indices; + if(myrank==0){ + build_material_node_list(mesh, + collective_mat_node_indices, + collective_mat_nodes_in_mat_elem, + State.MaterialToMeshMaps.elem, + mat_id, + num_mat_nodes, + num_mat_collective_elems, + num_nodes_in_elem, + num_dims); + } + + //map object for mat node indices + collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); + + DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); + HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 + mat_node_coords_comms.execute_comms(); + + DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); + HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 + mat_node_scalars_comms.execute_comms(); + + DistributedFArray collective_mat_node_vectors(collective_mat_node_map, num_node_vector_vars, "collective_mat_node_vectors"); + HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 + mat_node_vectors_comms.execute_comms(); + + HostDistributedMap collective_mat_node_map; // only write material data if the mat lives on the mesh, ie. has state allocated - if (global_num_mat_elems>0){ + if (global_num_mat_elems>0&&myrank==0){ // write out a vtu file this - write_vtu(collective_node_coords, + write_vtu(collective_mat_node_coords, collective_mat_nodes_in_mat_elem, collective_mat_elem_scalar_fields, collective_mat_elem_tensor_fields, - collective_node_scalar_fields, - collective_node_vector_fields, + collective_mat_node_scalar_fields, + collective_mat_node_vector_fields, mat_elem_scalar_var_names, mat_elem_tensor_var_names, node_scalar_var_names, @@ -3762,7 +3779,7 @@ class MeshWriter mat_fields_name, graphics_id, num_mat_nodes, - num_mat_elems, + num_mat_collective_elems, num_nodes_in_elem, Pn_order, num_dims, @@ -5561,9 +5578,6 @@ class MeshWriter const size_t solver_id ) { - int myrank, nranks; - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - MPI_Comm_size(MPI_COMM_WORLD,&nranks); FILE* out[20]; // the output files that are written to char filename[100]; // char string @@ -6041,7 +6055,63 @@ class MeshWriter } // end build part (i.e., material elem and point lists) function + ///////////////////////////////////////////////////////////////////////////// + /// + /// \fn build_material_elem_node_lists + /// + /// \brief Creates elems and nodes for a unique mesh of a material (i.e, a part) + /// + /// \param Simulation mesh + /// \param State node data + /// \param Material node coordinates + /// \param Material nodes in the material element + /// \param Material to mesh map for elements + /// \param number of material nodes + /// \param number of material elements + /// \param number of nodes in the element + /// \param number of dimensions + /// + ///////////////////////////////////////////////////////////////////////////// + void build_material_node_list( + const Mesh_t& mesh, + DCArrayKokkos collective_mat_node_indices, + DistributedFArray& mat_nodes_in_mat_elem, + const DRaggedRightArrayKokkos& MaterialToMeshMaps_elem, + const size_t mat_id, + size_t& num_mat_nodes, + const size_t num_mat_elems, + const size_t num_nodes_in_elem, + const size_t num_dims) + { + + + std::set mat_node_set; + long long int node_gid; + for (int elem_mat_id = 0; elem_mat_id < num_mat_elems; elem_mat_id++) + { + // set nodes per element + for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) + { + node_gid = mat_nodes_in_mat_elem(elem_mat_id, node_lid); //nodes in elem still stores global indices + mat_node_set.insert(node_gid); + } + } + + // save the number of nodes defining the material region, i.e., the part + num_mat_nodes = mat_node_set.size(); + //copy set to matar view + int inode = 0; + auto it = mat_node_set.begin(); + + // create a Map for ghost node indices + collective_mat_node_indices = DCArrayKokkos(num_mat_nodes, "mat_nodes"); + while (it != mat_node_set.end()) { + collective_mat_node_indices(ighost++) = *it; + it++; + } + + } // end build part (i.e., material elem and point lists) function From b4e3dea0fe4e8c00675af71702124a37da2bbd91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 8 Aug 2025 20:57:31 -0600 Subject: [PATCH 39/66] WIP: vtu collective writer --- .../src/common/include/mesh_io.h | 90 ++++++++++--------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index f50b857dc..0386ce391 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3352,19 +3352,23 @@ class MeshWriter const size_t num_elems = mesh.num_elems; const size_t num_dims = mesh.num_dims; const size_t num_nodes_in_elem = mesh.num_nodes_in_elem; + const size_t num_local_elems = mesh.num_local_elems; + const size_t num_local_nodes = mesh.num_local_nodes; + DistributedMap local_element_map = mesh.local_element_map; + DistributedMap node_map = mesh.node_map; const int Pn_order = mesh.Pn; /* save the elem state to an array for exporting to graphics files*/ //host version of local element map for argument compatibility HostDistributedMap host_local_element_map; - DCArrayKokkos global_indices_of_local_elements(mesh.num_local_elems); - for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ - global_indices_of_local_elements(ielem) = mesh.local_element_map.getGlobalIndex(ielem); + DCArrayKokkos global_indices_of_local_elements(num_local_elems); + for(int ielem = 0; ielem < num_local_elems; ielem++){ + global_indices_of_local_elements(ielem) = local_element_map.getGlobalIndex(ielem); } host_local_element_map = HostDistributedMap(global_indices_of_local_elements); - DistributedDFArray elem_scalar_fields(host_local_element_map, num_elem_scalar_vars, "elem_scalars"); - DistributedDFArray elem_tensor_fields(host_local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); + DistributedDFArray elem_scalar_fields(local_element_map, num_elem_scalar_vars, "elem_scalars"); + DistributedDFArray elem_tensor_fields(local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); elem_scalar_fields.set_values(0.0); elem_tensor_fields.set_values(0.0); //duplicate for now to allow compatibility with comm plan object when using Tpetra (src and dst device type must be equal) @@ -3461,8 +3465,8 @@ class MeshWriter host_node_map = HostDistributedMap(global_indices_of_local_nodes); // save the nodal fields to an array for exporting to graphics files - DistributedDFArray node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); - DistributedDFArray node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); + DistributedDFArray node_scalar_fields(node_map, num_node_scalar_vars, "node_scalars"); + DistributedDFArray node_vector_fields(node_map, num_node_vector_vars, 3, "node_tenors"); DistributedFArray host_node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); DistributedFArray host_node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); @@ -3531,7 +3535,7 @@ class MeshWriter //collect nodes in elem with a conversion back to global node ids DistributedFArray collective_nodes_in_elem(collective_elem_map, mesh.num_nodes_in_elem); - HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem, collective_elem_scalars_comms); + HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem); nodes_in_elem_comms.execute_comms(); @@ -3552,10 +3556,10 @@ class MeshWriter //collect nodal coordinates //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) - DistributedFArray host_node_coords(host_node_map, mesh.num_dims); + DistributedFArray host_node_coords(host_node_map, mesh.num_dims); for (size_t node_id = 0; node_id < mesh.num_local_nodes; node_id++) { for (int idim = 0; idim < mesh.num_dims; idim++) { - host_node_coords(elem_id, node_lid) = State.node.coords.host(node_gid, 0); + host_node_coords(node_id, idim) = State.node.coords.host(node_id, idim); } } // end for elem_gid DistributedFArray collective_node_coords(collective_node_map, mesh.num_dims); @@ -3654,7 +3658,7 @@ class MeshWriter HostDistributedMap host_mat_elem_map; DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = State.MaterialToMeshMaps.elem(ielem); + global_indices_of_local_mat_elems(ielem) = State.MaterialToMeshMaps.elem.host(mat_id, ielem); } host_mat_elem_map = HostDistributedMap(global_indices_of_local_nodes); @@ -3724,10 +3728,10 @@ class MeshWriter HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 mat_nodes_in_elem_comms.execute_comms(); - HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); //doesnt really do comms since all on rank 0 + HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); //doesnt really do comms since all on rank 0 mat_elem_scalars_comms.execute_comms(); - HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); //doesnt really do comms since all on rank 0 + HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); //doesnt really do comms since all on rank 0 mat_elem_tensors_comms.execute_comms(); //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat @@ -3747,24 +3751,22 @@ class MeshWriter } //map object for mat node indices - collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); + HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); - DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); - HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 + DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); + HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 mat_node_coords_comms.execute_comms(); - DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); - HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 + DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); + HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 mat_node_scalars_comms.execute_comms(); - DistributedFArray collective_mat_node_vectors(collective_mat_node_map, num_node_vector_vars, "collective_mat_node_vectors"); - HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 + DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, "collective_mat_node_vectors"); + HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 mat_node_vectors_comms.execute_comms(); - - HostDistributedMap collective_mat_node_map; // only write material data if the mat lives on the mesh, ie. has state allocated - if (global_num_mat_elems>0&&myrank==0){ + if (num_mat_collective_elems>0&&myrank==0){ // write out a vtu file this write_vtu(collective_mat_node_coords, collective_mat_nodes_in_mat_elem, @@ -4962,8 +4964,8 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void copy_elem_fields(DistributedDFArray& elem_scalar_fields, DistributedDFArray& elem_tensor_fields, - DistributedFArray& elem_scalar_fields, - DistributedFArray& elem_tensor_fields, + DistributedFArray& host_elem_scalar_fields, + DistributedFArray& host_elem_tensor_fields, const DRaggedRightArrayKokkos& MaterialToMeshMaps_elem, const std::vector& output_elem_state, const std::vector& output_gauss_pt_states, @@ -5213,7 +5215,7 @@ class MeshWriter break; case material_pt_state::volume_fraction: // material volume fraction - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) + for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field // this is the volume fraction of a material within a part @@ -5284,7 +5286,8 @@ class MeshWriter case material_pt_state::heat_flux: break; } // end switch - }// end for over mat point state + } + } // end of function @@ -5369,7 +5372,7 @@ class MeshWriter } // end if // accellerate, var is node_accel_id - node_vector_fields(node_gid, node_accel_id 0) = (Node.vel(node_gid, 0) - Node.vel_n0(node_gid, 0))/dt; + node_vector_fields(node_gid, node_accel_id, 0) = (Node.vel(node_gid, 0) - Node.vel_n0(node_gid, 0))/dt; node_vector_fields(node_gid, node_accel_id, 1) = (Node.vel(node_gid, 1) - Node.vel_n0(node_gid, 1))/dt; if (num_dims == 2) { node_vector_fields(node_gid, node_accel_id, 2) = 0.0; @@ -5452,15 +5455,15 @@ class MeshWriter // scalars case node_state::mass: - FOR_ALL(node_gid, 0, num_nodes, { + for(long long int node_gid = 0; node_gid < num_nodes; node_gid++) { host_node_scalar_fields(node_gid, node_mass_id) = node_scalar_fields.host(node_gid, node_mass_id); - }); + } break; case node_state::temp: - FOR_ALL(node_gid, 0, num_nodes, { + for(long long int node_gid = 0; node_gid < num_nodes; node_gid++) { host_node_scalar_fields(node_gid, node_temp_id) = node_scalar_fields.host(node_gid, node_temp_id); - }); + } break; @@ -5468,23 +5471,23 @@ class MeshWriter case node_state::coords: - FOR_ALL(node_gid, 0, num_nodes, { + for(long long int node_gid = 0; node_gid < num_nodes; node_gid++) { - host_node_vector_fields(node_gid, node_coord_id, 0) = host.node_vector_fields(node_gid, node_coord_id, 0); - host_node_vector_fields(node_gid, node_coord_id, 1) = host.node_vector_fields(node_gid, node_coord_id, 1); + host_node_vector_fields(node_gid, node_coord_id, 0) = node_vector_fields.host(node_gid, node_coord_id, 0); + host_node_vector_fields(node_gid, node_coord_id, 1) = node_vector_fields.host(node_gid, node_coord_id, 1); if (num_dims == 2) { host_node_vector_fields(node_gid, node_coord_id, 2) = 0.0; } else{ - host_node_vector_fields(node_gid, node_coord_id, 2) = host.node_vector_fields(node_gid, node_coord_id, 2); + host_node_vector_fields(node_gid, node_coord_id, 2) = node_vector_fields.host(node_gid, node_coord_id, 2); } // end if - }); // end parallel for + } // end parallel for break; case node_state::velocity: - FOR_ALL(node_gid, 0, num_nodes, { + for(long long int node_gid = 0; node_gid < num_nodes; node_gid++) { // velocity, var is node_vel_id host_node_vector_fields(node_gid, node_vel_id, 0) = node_vector_fields.host(node_gid, node_vel_id, 0); @@ -5506,14 +5509,14 @@ class MeshWriter host_node_vector_fields(node_gid, node_accel_id, 2) = node_vector_fields.host(node_gid, node_accel_id, 2); } // end if - }); // end parallel for + } // end parallel for break; case node_state::gradient_level_set: - FOR_ALL(node_gid, 0, num_nodes, { + for(long long int node_gid = 0; node_gid < num_nodes; node_gid++) { // velocity, var is node_vel_id host_node_vector_fields(node_gid, node_grad_level_set_id, 0) = node_vector_fields.host(node_gid, node_grad_level_set_id, 0); @@ -5525,7 +5528,7 @@ class MeshWriter host_node_vector_fields(node_gid, node_grad_level_set_id, 2) = node_vector_fields.host(node_gid, node_grad_level_set_id, 2); } // end if - }); // end parallel for + } // end parallel for break; @@ -5558,8 +5561,8 @@ class MeshWriter /// ///////////////////////////////////////////////////////////////////////////// void write_vtu( - const DistributedCArray& node_coords_host, - const DistributedCArray& nodes_in_elem_host, + const DistributedFArray& node_coords_host, + const DistributedFArray& nodes_in_elem_host, const DistributedFArray& elem_scalar_fields, const DistributedFArray& elem_tensor_fields, const DistributedFArray& node_scalar_fields, @@ -6102,6 +6105,7 @@ class MeshWriter //copy set to matar view int inode = 0; + int ighost = 0; auto it = mat_node_set.begin(); // create a Map for ghost node indices From 87f5ee4e71df28c5d06253ede679c70103324516 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 10 Aug 2025 19:11:43 -0600 Subject: [PATCH 40/66] WIP: collective vtu writer --- .../src/common/include/mesh_io.h | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 0386ce391..9fc7a2d9c 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3412,7 +3412,7 @@ class MeshWriter if (sie_id>=0){ FOR_ALL(elem_gid, 0, num_elems, { // get sie by dividing by the mass - elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(mass_id, elem_gid)+1.e-20); + elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(elem_gid, mass_id)+1.e-20); }); } // end if @@ -3548,7 +3548,7 @@ class MeshWriter //collective vector and comms to the collective vector for node fields DistributedFArray collective_node_scalar_fields(collective_node_map, num_node_scalar_vars); - DistributedFArray collective_node_vector_fields(collective_node_map, num_node_vector_vars); + DistributedFArray collective_node_vector_fields(collective_node_map, num_node_vector_vars, 3); HostCommPlan collective_node_scalars_comms(collective_node_scalar_fields, host_node_scalar_fields); HostCommPlan collective_node_vectors_comms(collective_node_vector_fields, host_node_vector_fields, collective_node_scalars_comms); collective_node_scalars_comms.execute_comms(); @@ -3761,7 +3761,7 @@ class MeshWriter HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 mat_node_scalars_comms.execute_comms(); - DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, "collective_mat_node_vectors"); + DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 mat_node_vectors_comms.execute_comms(); @@ -3813,7 +3813,6 @@ class MeshWriter { write_mesh_state = true; } - // check to see if a mat state was written bool write_mat_pt_state = false; if( num_mat_pt_scalar_vars > 0 || @@ -5181,14 +5180,14 @@ class MeshWriter for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { // field - mat_elem_scalar_fields(mat_den_id, mat_elem_lid) = MaterialPoints.den.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_den_id) = MaterialPoints.den.host(mat_id, mat_elem_lid); } break; case material_pt_state::pressure: for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { // field - mat_elem_scalar_fields(mat_pres_id, mat_elem_lid) = MaterialPoints.pres.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_pres_id) = MaterialPoints.pres.host(mat_id, mat_elem_lid); } break; case material_pt_state::specific_internal_energy: @@ -5196,21 +5195,21 @@ class MeshWriter // field // extensive ie here, but after this function, it will become specific ie - mat_elem_scalar_fields(mat_sie_id, mat_elem_lid) = MaterialPoints.sie.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_sie_id) = MaterialPoints.sie.host(mat_id, mat_elem_lid); } break; case material_pt_state::sound_speed: for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_sspd_id, mat_elem_lid) = MaterialPoints.sspd.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_sspd_id) = MaterialPoints.sspd.host(mat_id, mat_elem_lid); } break; case material_pt_state::mass: for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_mass_id, mat_elem_lid) = MaterialPoints.mass.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_mass_id) = MaterialPoints.mass.host(mat_id, mat_elem_lid); } break; case material_pt_state::volume_fraction: @@ -5219,7 +5218,7 @@ class MeshWriter // field // this is the volume fraction of a material within a part - mat_elem_scalar_fields(mat_volfrac_id, mat_elem_lid) = MaterialPoints.volfrac.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_volfrac_id) = MaterialPoints.volfrac.host(mat_id, mat_elem_lid); } // geometric volume fraction @@ -5227,14 +5226,14 @@ class MeshWriter // field // this is the geometric volume fraction (interface reconstruction) - mat_elem_scalar_fields(mat_geo_volfrac_id, mat_elem_lid) = MaterialPoints.geo_volfrac.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_geo_volfrac_id) = MaterialPoints.geo_volfrac.host(mat_id, mat_elem_lid); } break; case material_pt_state::eroded_flag: for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_eroded_id, mat_elem_lid) = (double)MaterialPoints.eroded.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_eroded_id) = (double)MaterialPoints.eroded.host(mat_id, mat_elem_lid); } break; // --------------- @@ -5250,7 +5249,7 @@ class MeshWriter for(size_t j=0; j<3; j++){ // stress tensor - mat_elem_tensor_fields(mat_stress_id, mat_elem_lid, i, j) = + mat_elem_tensor_fields(mat_elem_lid, mat_stress_id, i, j) = MaterialPoints.stress.host(mat_id, mat_elem_lid,i,j); } // end for } // end for @@ -5262,7 +5261,7 @@ class MeshWriter for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_conductivity_id, mat_elem_lid) = MaterialPoints.conductivity.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_conductivity_id) = MaterialPoints.conductivity.host(mat_id, mat_elem_lid); } break; @@ -5270,7 +5269,7 @@ class MeshWriter for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ // field - mat_elem_scalar_fields(mat_specific_heat_id, mat_elem_lid) = MaterialPoints.specific_heat.host(mat_id, mat_elem_lid); + mat_elem_scalar_fields(mat_elem_lid, mat_specific_heat_id) = MaterialPoints.specific_heat.host(mat_id, mat_elem_lid); } break; @@ -5744,9 +5743,9 @@ class MeshWriter for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { fprintf(out[0], " %f %f %f\n", - node_vector_fields(a_var, node_gid, 0), - node_vector_fields(a_var, node_gid, 1), - node_vector_fields(a_var, node_gid, 2)); + node_vector_fields(node_gid, a_var, 0), + node_vector_fields(node_gid, a_var, 1), + node_vector_fields(node_gid, a_var, 2)); } // end for nodes fprintf(out[0], " \n"); @@ -5757,7 +5756,7 @@ class MeshWriter for (int a_var = 0; a_var < num_node_scalar_vars; a_var++) { fprintf(out[0], " \n", node_scalar_var_names[a_var].c_str()); for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], " %f\n", node_scalar_fields(a_var, node_gid)); + fprintf(out[0], " %f\n", node_scalar_fields(node_gid, a_var)); } // end for nodes fprintf(out[0], " \n"); } // end for vec_vars @@ -5782,7 +5781,7 @@ class MeshWriter fprintf(out[0], " \n", elem_scalar_var_names[a_var].c_str()); // the 1 is number of scalar components [1:4] for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) { - fprintf(out[0], " %f\n", elem_scalar_fields(a_var, elem_gid)); + fprintf(out[0], " %f\n", elem_scalar_fields(elem_gid, a_var)); } // end for elem fprintf(out[0], " \n"); } // end for elem scalar_vars @@ -5797,7 +5796,7 @@ class MeshWriter // Txx Txy Txz Tyx Tyy Tyz Tzx Tzy Tzz for (size_t i=0; i<3; i++){ for(size_t j=0; j<3; j++){ - fprintf(out[0], " %f ", elem_tensor_fields(a_var, elem_gid, i, j)); + fprintf(out[0], " %f ", elem_tensor_fields(elem_gid, a_var, i, j)); } // end j } // end i } // end for elem @@ -6077,7 +6076,7 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void build_material_node_list( const Mesh_t& mesh, - DCArrayKokkos collective_mat_node_indices, + DCArrayKokkos& collective_mat_node_indices, DistributedFArray& mat_nodes_in_mat_elem, const DRaggedRightArrayKokkos& MaterialToMeshMaps_elem, const size_t mat_id, From f877dad35509beebf22fbad8196c999c64b39ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 11 Aug 2025 00:39:55 -0600 Subject: [PATCH 41/66] BUG: collective vtu writer --- .../src/common/include/mesh_io.h | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 9fc7a2d9c..e4719ab16 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3410,7 +3410,7 @@ class MeshWriter // make specific fields for the element average if (sie_id>=0){ - FOR_ALL(elem_gid, 0, num_elems, { + FOR_ALL(elem_gid, 0, num_local_elems, { // get sie by dividing by the mass elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(elem_gid, mass_id)+1.e-20); }); @@ -3527,7 +3527,7 @@ class MeshWriter DistributedFArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem); //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) - for (size_t elem_id = 0; elem_id < mesh.num_local_elems; elem_id++) { + for (size_t elem_id = 0; elem_id < num_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem(elem_id, node_lid)); } @@ -3557,7 +3557,7 @@ class MeshWriter //collect nodal coordinates //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) DistributedFArray host_node_coords(host_node_map, mesh.num_dims); - for (size_t node_id = 0; node_id < mesh.num_local_nodes; node_id++) { + for (size_t node_id = 0; node_id < num_local_nodes; node_id++) { for (int idim = 0; idim < mesh.num_dims; idim++) { host_node_coords(node_id, idim) = State.node.coords.host(node_id, idim); } @@ -3651,14 +3651,15 @@ class MeshWriter processes_num_local_mat_elems = CArray(nranks); gatherv_displacements = CArray(nranks); } - MPI_Gather(&num_mat_local_elems,1,MPI_LONG_LONG_INT,processes_num_local_mat_elems.pointer(),1, - MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, + MPI_INT, 0, MPI_COMM_WORLD); //set global element indices on this rank HostDistributedMap host_mat_elem_map; + DistributedMap element_map = mesh.element_map; DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = State.MaterialToMeshMaps.elem.host(mat_id, ielem); + global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem.host(mat_id, ielem)); } host_mat_elem_map = HostDistributedMap(global_indices_of_local_nodes); @@ -3675,8 +3676,14 @@ class MeshWriter gatherv_displacements(irank) = num_mat_collective_elems; num_mat_collective_elems += processes_num_local_mat_elems(irank); } - global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_local_elems); + global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); } + // if(myrank==0){ + // for(int irank=0; irank < nranks; irank++){ + // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; + // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; + // } + // } MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); From dcdbb3a88f6e005fb8bbde6ae6ed3411c756b04b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 11 Aug 2025 18:49:01 -0600 Subject: [PATCH 42/66] BUG: collective vtu writer --- single-node-refactor/src/common/include/mesh_io.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index e4719ab16..ff4948b49 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3661,11 +3661,11 @@ class MeshWriter for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem.host(mat_id, ielem)); } - host_mat_elem_map = HostDistributedMap(global_indices_of_local_nodes); + host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); //allocate arrays for distributed mat elem data - DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_elem_scalar_vars, "mat_elem_scalars"); - DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_elem_tensor_vars, 3, 3, "mat_elem_tensors"); + DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); + DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); //collect global element indices on rank 0 for this mat //tally total number of mat elems for rank 0 @@ -3691,10 +3691,11 @@ class MeshWriter //use indices on rank 0 to construct rank 0 collective map for this mat HostDistributedMap collective_mat_elem_map; collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); + //collective_mat_elem_map.print(); //collective storage for scalars and tensors using collective elem mat map - DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_elem_scalar_vars, "mat_elem_scalars_collective"); - DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_elem_tensor_vars, 3, 3, "mat_elem_tensors_collective"); + DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); + DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); // set the nodal vars to zero size, we don't write these fields again node_scalar_var_names.clear(); @@ -3735,10 +3736,10 @@ class MeshWriter HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 mat_nodes_in_elem_comms.execute_comms(); - HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); //doesnt really do comms since all on rank 0 + HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); mat_elem_scalars_comms.execute_comms(); - HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); //doesnt really do comms since all on rank 0 + HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); mat_elem_tensors_comms.execute_comms(); //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat From 6f0e82864f8651f193703a0400cf3e8a79f5586c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 12 Aug 2025 22:54:34 -0600 Subject: [PATCH 43/66] ENH: collective vtu writer --- single-node-refactor/src/common/include/mesh_io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index ff4948b49..4c6b5c023 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3772,6 +3772,13 @@ class MeshWriter DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 mat_node_vectors_comms.execute_comms(); + + //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh + for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); + } + } // end for elem_gid // only write material data if the mat lives on the mesh, ie. has state allocated if (num_mat_collective_elems>0&&myrank==0){ From 3dc2dd619beca6d26a34697aa8a6f02491602fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 19 Aug 2025 00:14:24 -0600 Subject: [PATCH 44/66] WIP: parallel vtm write --- .../src/common/include/mesh_io.h | 1294 ++++++++++++----- 1 file changed, 942 insertions(+), 352 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 85c4d1dd3..daf5ac1a8 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -2721,10 +2721,6 @@ class MeshWriter std::vector material_pt_states, const size_t solver_id) { - - int myrank, nranks; - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - MPI_Comm_size(MPI_COMM_WORLD,&nranks); // node_state is an enum for possible fields (e.g., coords, velocity, etc.), see state.h // gauss_pt_state is an enum for possible fields (e.g., vol, divergence, etc.) @@ -3342,7 +3338,165 @@ class MeshWriter } // end switch } // end for over + // ******************************** + // Write the collective nodal and elem fields + // ******************************** + + if (SimulationParameters.output_options.format == output_options::viz || + SimulationParameters.output_options.format == output_options::viz_and_state) { + + write_parallel_viz(mesh, + State, + SimulationParameters, + dt, + time_value, + graphics_times, + solver_id, + node_states, + gauss_pt_states, + material_pt_states, + num_elem_scalar_vars, + num_elem_tensor_vars, + num_node_scalar_vars, + num_node_vector_vars, + num_mat_pt_scalar_vars, + num_mat_pt_tensor_vars, + elem_scalar_var_names, + elem_tensor_var_names, + mat_elem_scalar_var_names, + mat_elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + den_id, + pres_id, + sie_id, + sspd_id, + mass_id, + stress_id, + vol_id, + div_id, + level_set_id, + vel_grad_id, + conductivity_id, + specific_heat_id, + node_mass_id, + node_vel_id, + node_accel_id, + node_coord_id, + node_grad_level_set_id, + node_temp_id, + mat_den_id, + mat_pres_id, + mat_sie_id, + mat_sspd_id, + mat_mass_id, + mat_volfrac_id, + mat_geo_volfrac_id, + mat_eroded_id, + mat_stress_id, + mat_conductivity_id, + mat_specific_heat_id); + } // end if viz paraview output is to be written + + + // STATE + if (SimulationParameters.output_options.format == output_options::state || + SimulationParameters.output_options.format == output_options::viz_and_state) { + + write_material_point_state(mesh, + State, + SimulationParameters, + time_value, + graphics_times, + node_states, + gauss_pt_states, + material_pt_states); + + } // end if state is to be written + + + // will drop ensight outputs in the near future + if (SimulationParameters.output_options.format == output_options::ensight){ + write_ensight(mesh, + State, + SimulationParameters, + dt, + time_value, + graphics_times, + node_states, + gauss_pt_states, + material_pt_states); + } + + return; + } // end write_mesh + + ///////////////////////////////////////////////////////////////////////////// + /// + /// \fn write_collective vtm + /// + /// \brief Writes an ensight output file + /// + /// \param Simulation mesh + /// \param State data + /// \param Simulation parameters + /// \param current time value + /// \param Vector of all graphics output times + /// + ///////////////////////////////////////////////////////////////////////////// + void write_parallel_viz(Mesh_t& mesh, + State_t& State, + SimulationParameters_t& SimulationParameters, + double dt, + double time_value, + CArray graphics_times, + const size_t solver_id, + std::vector node_states, + std::vector gauss_pt_states, + std::vector material_pt_states, + size_t num_elem_scalar_vars, + size_t num_elem_tensor_vars, + size_t num_node_scalar_vars, + size_t num_node_vector_vars, + size_t num_mat_pt_scalar_vars, + size_t num_mat_pt_tensor_vars, + std::vector elem_scalar_var_names, + std::vector elem_tensor_var_names, + std::vector mat_elem_scalar_var_names, + std::vector mat_elem_tensor_var_names, + std::vector node_scalar_var_names, + std::vector node_vector_var_names, + const int den_id, + const int pres_id, + const int sie_id, + const int sspd_id, + const int mass_id, + const int stress_id, + const int vol_id, + const int div_id, + const int level_set_id, + const int vel_grad_id, + const int conductivity_id, + const int specific_heat_id, + const int node_mass_id, + const int node_vel_id, + const int node_accel_id, + const int node_coord_id, + const int node_grad_level_set_id, + const int node_temp_id, + const int mat_den_id, + const int mat_pres_id, + const int mat_sie_id, + const int mat_sspd_id, + const int mat_mass_id, + const int mat_volfrac_id, + const int mat_geo_volfrac_id, + const int mat_eroded_id, + const int mat_stress_id, + const int mat_conductivity_id, + const int mat_specific_heat_id) + { // ************************************** // build and save element average fields // ************************************** @@ -3356,8 +3510,485 @@ class MeshWriter const size_t num_local_nodes = mesh.num_local_nodes; DistributedMap local_element_map = mesh.local_element_map; DistributedMap node_map = mesh.node_map; + DistributedMap nonoverlap_element_node_map = mesh.nonoverlap_element_node_map; const int Pn_order = mesh.Pn; + const size_t num_mats = State.MaterialPoints.num_material_points.size(); + + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + + /* save the elem state to an array for exporting to graphics files*/ + + //host version of local element map for argument compatibility + DistributedDFArray elem_scalar_fields(local_element_map, num_elem_scalar_vars, "elem_scalars"); + DistributedDFArray elem_tensor_fields(local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); + elem_scalar_fields.set_values(0.0); + elem_tensor_fields.set_values(0.0); + + // ----------------------------------------------------------------------- + // save the output fields to a single element average array for all state + // ----------------------------------------------------------------------- + for (int mat_id = 0; mat_id < num_mats; mat_id++) { + + // material point and guass point state are concatenated together + concatenate_elem_fields(State.MaterialPoints, + State.GaussPoints, + elem_scalar_fields, + elem_tensor_fields, + State.MaterialToMeshMaps.elem_in_mat_elem, + SimulationParameters.output_options.output_elem_state, + SimulationParameters.output_options.output_gauss_pt_state, + State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), + mat_id, + num_local_elems, + den_id, + pres_id, + sie_id, + sspd_id, + mass_id, + stress_id, + vol_id, + div_id, + level_set_id, + vel_grad_id, + conductivity_id, + specific_heat_id); + } // end for mats + + // make specific fields for the element average + if (sie_id>=0){ + FOR_ALL(elem_gid, 0, num_local_elems, { + // get sie by dividing by the mass + elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(elem_gid, mass_id)+1.e-20); + }); + } // end if + + Kokkos::fence(); + elem_scalar_fields.update_host(); + elem_tensor_fields.update_host(); + + + // ************************ + // Build the nodal fields + // ************************ + + // save the nodal fields to an array for exporting to graphics files + DistributedDFArray node_scalar_fields(node_map, num_node_scalar_vars, "node_scalars"); + DistributedDFArray node_vector_fields(node_map, num_node_vector_vars, 3, "node_tenors"); + DistributedDFArray nonoverlap_node_scalar_fields(nonoverlap_elem_node_map, num_node_scalar_vars, "node_scalars"); + DistributedDFArray nonoverlap_node_vector_fields(nonoverlap_elem_node_map, num_node_vector_vars, 3, "node_tenors"); + + concatenate_nodal_fields(State.node, + node_scalar_fields, + node_vector_fields, + SimulationParameters.output_options.output_node_state, + dt, + num_local_nodes, + num_dims, + node_mass_id, + node_vel_id, + node_accel_id, + node_coord_id, + node_grad_level_set_id, + node_temp_id); + + + Kokkos::fence(); + node_scalar_fields.update_host(); + node_vector_fields.update_host(); + + // *************************************************************************** + // Communications for node data from node map to nodes on unique element map + // *************************************************************************** + + //node data comms + CommPlan node_scalars_comms(nonoverlap_node_scalar_fields, node_scalar_fields); + CommPlan node_vectors_comms(nonoverlap_node_vector_fields, node_vector_fields, node_scalars_comms); + node_scalars_comms.execute_comms(); + node_vectors_comms.execute_comms(); + + //nodal coordinates comms + //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) + DistributedDFArray nonoverlap_node_coords(nonoverlap_element_node_map, mesh.num_dims); + CommPlan node_coords_comms(nonoverlap_node_coords, State.node.coords, node_scalars_comms); + node_coords_comms.execute_comms(); + + if(myrank==0){ + // create the folder structure if it does not exist + struct stat st; + + if (stat("vtk", &st) != 0) { + int returnCode = system("mkdir vtk"); + + if (returnCode == 1) { + std::cout << "Unable to make vtk directory" << std::endl; + } + } + else{ + if(solver_id==0 && graphics_id==0){ + // delete the existing files inside + int returnCode = system("rm vtk/Fierro*"); + if (returnCode == 1) { + std::cout << "Unable to clear vtk/Fierro directory" << std::endl; + } + } + } + + if (stat("vtk/data", &st) != 0) { + int returnCode = system("mkdir vtk/data"); + if (returnCode == 1) { + std::cout << "Unable to make vtk/data directory" << std::endl; + } + } + else{ + if(solver_id==0 && graphics_id==0){ + // delete the existing files inside the folder + int returnCode = system("rm vtk/data/Fierro*"); + if (returnCode == 1) { + std::cout << "Unable to clear vtk/data directory" << std::endl; + } + } + } + } + // call the .vtu writer for element fields + std::string elem_fields_name = "fields"; + + if(myrank==0){ + write_vtu(nonoverlap_node_coords, + nodes_in_elem, + elem_scalar_fields, + elem_tensor_fields, + nonoverlap_node_scalar_fields, + nonoverlap_node_vector_fields, + elem_scalar_var_names, + elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + elem_fields_name, + graphics_id, + nonoverlap_elem_node_map.size(), + mesh.num_local_elems, + num_nodes_in_elem, + Pn_order, + num_dims, + solver_id); + } + + // ******************************** + // Build and write the mat fields + // ******************************** + + + // note: the file path and folder was created in the elem and node outputs + size_t num_mat_files_written = 0; + if(num_mat_pt_scalar_vars > 0 || num_mat_pt_tensor_vars >0){ + + for (int mat_id = 0; mat_id < num_mats; mat_id++) { + + const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); + //array storing number of local elems for this material on each process + CArray processes_num_local_mat_elems, gatherv_displacements; + if(myrank==0){ + processes_num_local_mat_elems = CArray(nranks); + gatherv_displacements = CArray(nranks); + } + MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, + MPI_INT, 0, MPI_COMM_WORLD); + + //set global element indices on this rank + HostDistributedMap host_mat_elem_map; + DistributedMap element_map = mesh.element_map; + DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ + global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); + } + host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); + + //allocate arrays for distributed mat elem data + DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); + DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); + + //collect global element indices on rank 0 for this mat + //tally total number of mat elems for rank 0 + DCArrayKokkos global_indices_of_collective_mat_elems; + long long int num_mat_collective_elems = 0; + if(myrank==0){ + for(int irank=0; irank < nranks; irank++){ + gatherv_displacements(irank) = num_mat_collective_elems; + num_mat_collective_elems += processes_num_local_mat_elems(irank); + } + global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); + } + // if(myrank==0){ + // for(int irank=0; irank < nranks; irank++){ + // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; + // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; + // } + // } + MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, + global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), + gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + //use indices on rank 0 to construct rank 0 collective map for this mat + HostDistributedMap collective_mat_elem_map; + collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); + //collective_mat_elem_map.print(); + + //collective storage for scalars and tensors using collective elem mat map + DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); + DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); + + // set the nodal vars to zero size, we don't write these fields again + node_scalar_var_names.clear(); + node_vector_var_names.clear(); + + // concatenate material fields into a single array + concatenate_mat_fields(State.MaterialPoints, + host_mat_elem_scalar_fields, + host_mat_elem_tensor_fields, + State.MaterialToMeshMaps.elem_in_mat_elem, + SimulationParameters.output_options.output_mat_pt_state, + num_mat_local_elems, + mat_id, + mat_den_id, + mat_pres_id, + mat_sie_id, + mat_sspd_id, + mat_mass_id, + mat_volfrac_id, + mat_geo_volfrac_id, + mat_eroded_id, + mat_stress_id, + mat_conductivity_id, + mat_specific_heat_id); + + + std::string str_mat_val = std::to_string(mat_id); + std::string mat_fields_name = "mat"; + mat_fields_name += str_mat_val; // add the mat number + + // the number of actual nodes belonging to the part (i.e., the material) + size_t num_mat_nodes = 0; + + //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 + + //collect nodes in elem for this material on rank 0 + DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); + HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 + mat_nodes_in_elem_comms.execute_comms(); + + HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); + mat_elem_scalars_comms.execute_comms(); + + HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); + mat_elem_tensors_comms.execute_comms(); + + //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat + + // build a unique mesh (element and nodes) for the material (i.e., the part) + DCArrayKokkos collective_mat_node_indices; + if(myrank==0){ + build_material_node_list(mesh, + collective_mat_node_indices, + collective_mat_nodes_in_mat_elem, + State.MaterialToMeshMaps.elem_in_mat_elem, + mat_id, + num_mat_nodes, + num_mat_collective_elems, + num_nodes_in_elem, + num_dims); + } + + //map object for mat node indices + HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); + + DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); + HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 + mat_node_coords_comms.execute_comms(); + + DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); + HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 + mat_node_scalars_comms.execute_comms(); + + DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); + HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 + mat_node_vectors_comms.execute_comms(); + + //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh + for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); + } + } // end for elem_gid + + // only write material data if the mat lives on the mesh, ie. has state allocated + if (num_mat_collective_elems>0&&myrank==0){ + // write out a vtu file this + write_vtu(collective_mat_node_coords, + collective_mat_nodes_in_mat_elem, + collective_mat_elem_scalar_fields, + collective_mat_elem_tensor_fields, + collective_mat_node_scalar_fields, + collective_mat_node_vector_fields, + mat_elem_scalar_var_names, + mat_elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + mat_fields_name, + graphics_id, + num_mat_nodes, + num_mat_collective_elems, + num_nodes_in_elem, + Pn_order, + num_dims, + solver_id); + + + num_mat_files_written++; + + } // end for mat_id + + } // end if material is on the mesh + + } // end if mat variables are to be written + + + // ************************************************* + // write Paraview files to open the graphics files + // ************************************************* + + // save the graphics time + graphics_times(graphics_id) = time_value; + + // check to see if an mesh state was written + bool write_mesh_state = false; + if( num_elem_scalar_vars > 0 || + num_elem_tensor_vars > 0 || + num_node_scalar_vars > 0 || + num_node_vector_vars > 0) + { + write_mesh_state = true; + } + // check to see if a mat state was written + bool write_mat_pt_state = false; + if( num_mat_pt_scalar_vars > 0 || + num_mat_pt_tensor_vars > 0) + { + write_mat_pt_state = true; + } + + // call the vtm file writer + std::string mat_fields_name = "mat"; + if(myrank==0){ + write_vtm(graphics_times, + elem_fields_name, + mat_fields_name, + time_value, + graphics_id, + num_mat_files_written, + write_mesh_state, + write_mat_pt_state, + solver_id); + + // call the pvd file writer + write_pvd(graphics_times, + time_value, + graphics_id, + solver_id); + } + + // increment graphics id counter + graphics_id++; // this is private variable in the class + } + + ///////////////////////////////////////////////////////////////////////////// + /// + /// \fn write_collective vtm + /// + /// \brief Writes an ensight output file + /// + /// \param Simulation mesh + /// \param State data + /// \param Simulation parameters + /// \param current time value + /// \param Vector of all graphics output times + /// + ///////////////////////////////////////////////////////////////////////////// + void write_collective_viz(Mesh_t& mesh, + State_t& State, + SimulationParameters_t& SimulationParameters, + double dt, + double time_value, + CArray graphics_times, + const size_t solver_id, + std::vector node_states, + std::vector gauss_pt_states, + std::vector material_pt_states, + size_t num_elem_scalar_vars, + size_t num_elem_tensor_vars, + size_t num_node_scalar_vars, + size_t num_node_vector_vars, + size_t num_mat_pt_scalar_vars, + size_t num_mat_pt_tensor_vars, + std::vector elem_scalar_var_names, + std::vector elem_tensor_var_names, + std::vector mat_elem_scalar_var_names, + std::vector mat_elem_tensor_var_names, + std::vector node_scalar_var_names, + std::vector node_vector_var_names, + const int den_id, + const int pres_id, + const int sie_id, + const int sspd_id, + const int mass_id, + const int stress_id, + const int vol_id, + const int div_id, + const int level_set_id, + const int vel_grad_id, + const int conductivity_id, + const int specific_heat_id, + const int node_mass_id, + const int node_vel_id, + const int node_accel_id, + const int node_coord_id, + const int node_grad_level_set_id, + const int node_temp_id, + const int mat_den_id, + const int mat_pres_id, + const int mat_sie_id, + const int mat_sspd_id, + const int mat_mass_id, + const int mat_volfrac_id, + const int mat_geo_volfrac_id, + const int mat_eroded_id, + const int mat_stress_id, + const int mat_conductivity_id, + const int mat_specific_heat_id) + { + // ************************************** + // build and save element average fields + // ************************************** + + // short hand + const size_t num_nodes = mesh.num_nodes; + const size_t num_elems = mesh.num_elems; + const size_t num_dims = mesh.num_dims; + const size_t num_nodes_in_elem = mesh.num_nodes_in_elem; + const size_t num_local_elems = mesh.num_local_elems; + const size_t num_local_nodes = mesh.num_local_nodes; + DistributedMap local_element_map = mesh.local_element_map; + DistributedMap node_map = mesh.node_map; + const int Pn_order = mesh.Pn; + + const size_t num_mats = State.MaterialPoints.num_material_points.size(); + + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + /* save the elem state to an array for exporting to graphics files*/ //host version of local element map for argument compatibility @@ -3428,27 +4059,27 @@ class MeshWriter // material point and guass point state are concatenated together copy_elem_fields(elem_scalar_fields, - elem_tensor_fields, - host_elem_scalar_fields, - host_elem_tensor_fields, - State.MaterialToMeshMaps.elem_in_mat_elem, - SimulationParameters.output_options.output_elem_state, - SimulationParameters.output_options.output_gauss_pt_state, - State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), - mat_id, - num_local_elems, - den_id, - pres_id, - sie_id, - sspd_id, - mass_id, - stress_id, - vol_id, - div_id, - level_set_id, - vel_grad_id, - conductivity_id, - specific_heat_id); + elem_tensor_fields, + host_elem_scalar_fields, + host_elem_tensor_fields, + State.MaterialToMeshMaps.elem_in_mat_elem, + SimulationParameters.output_options.output_elem_state, + SimulationParameters.output_options.output_gauss_pt_state, + State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), + mat_id, + num_local_elems, + den_id, + pres_id, + sie_id, + sspd_id, + mass_id, + stress_id, + vol_id, + div_id, + level_set_id, + vel_grad_id, + conductivity_id, + specific_heat_id); } // end for mats @@ -3469,40 +4100,40 @@ class MeshWriter DistributedDFArray node_vector_fields(node_map, num_node_vector_vars, 3, "node_tenors"); DistributedFArray host_node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); DistributedFArray host_node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); - + concatenate_nodal_fields(State.node, - node_scalar_fields, - node_vector_fields, - SimulationParameters.output_options.output_node_state, - dt, - num_local_nodes, - num_dims, - node_mass_id, - node_vel_id, - node_accel_id, - node_coord_id, - node_grad_level_set_id, - node_temp_id); - + node_scalar_fields, + node_vector_fields, + SimulationParameters.output_options.output_node_state, + dt, + num_local_nodes, + num_dims, + node_mass_id, + node_vel_id, + node_accel_id, + node_coord_id, + node_grad_level_set_id, + node_temp_id); + Kokkos::fence(); node_scalar_fields.update_host(); node_vector_fields.update_host(); copy_nodal_fields(host_node_scalar_fields, - host_node_vector_fields, - node_scalar_fields, - node_vector_fields, - SimulationParameters.output_options.output_node_state, - dt, - num_local_nodes, - num_dims, - node_mass_id, - node_vel_id, - node_accel_id, - node_coord_id, - node_grad_level_set_id, - node_temp_id); + host_node_vector_fields, + node_scalar_fields, + node_vector_fields, + SimulationParameters.output_options.output_node_state, + dt, + num_local_nodes, + num_dims, + node_mass_id, + node_vel_id, + node_accel_id, + node_coord_id, + node_grad_level_set_id, + node_temp_id); // ************************************************** // Collective communications for node and elem data @@ -3566,334 +4197,293 @@ class MeshWriter HostCommPlan collective_node_coords_comms(collective_node_coords, host_node_coords, collective_node_scalars_comms); collective_node_coords_comms.execute_comms(); - // ******************************** - // Write the collective nodal and elem fields - // ******************************** - - if (SimulationParameters.output_options.format == output_options::viz || - SimulationParameters.output_options.format == output_options::viz_and_state) { - if(myrank==0){ - // create the folder structure if it does not exist - struct stat st; + if(myrank==0){ + // create the folder structure if it does not exist + struct stat st; - if (stat("vtk", &st) != 0) { - int returnCode = system("mkdir vtk"); + if (stat("vtk", &st) != 0) { + int returnCode = system("mkdir vtk"); - if (returnCode == 1) { - std::cout << "Unable to make vtk directory" << std::endl; - } + if (returnCode == 1) { + std::cout << "Unable to make vtk directory" << std::endl; } - else{ - if(solver_id==0 && graphics_id==0){ - // delete the existing files inside - int returnCode = system("rm vtk/Fierro*"); - if (returnCode == 1) { - std::cout << "Unable to clear vtk/Fierro directory" << std::endl; - } + } + else{ + if(solver_id==0 && graphics_id==0){ + // delete the existing files inside + int returnCode = system("rm vtk/Fierro*"); + if (returnCode == 1) { + std::cout << "Unable to clear vtk/Fierro directory" << std::endl; } } + } - if (stat("vtk/data", &st) != 0) { - int returnCode = system("mkdir vtk/data"); - if (returnCode == 1) { - std::cout << "Unable to make vtk/data directory" << std::endl; - } + if (stat("vtk/data", &st) != 0) { + int returnCode = system("mkdir vtk/data"); + if (returnCode == 1) { + std::cout << "Unable to make vtk/data directory" << std::endl; } - else{ - if(solver_id==0 && graphics_id==0){ - // delete the existing files inside the folder - int returnCode = system("rm vtk/data/Fierro*"); - if (returnCode == 1) { - std::cout << "Unable to clear vtk/data directory" << std::endl; - } + } + else{ + if(solver_id==0 && graphics_id==0){ + // delete the existing files inside the folder + int returnCode = system("rm vtk/data/Fierro*"); + if (returnCode == 1) { + std::cout << "Unable to clear vtk/data directory" << std::endl; } } } - // call the .vtu writer for element fields - std::string elem_fields_name = "fields"; - - if(myrank==0){ - write_vtu(collective_node_coords, - collective_nodes_in_elem, - collective_elem_scalar_fields, - collective_elem_tensor_fields, - collective_node_scalar_fields, - collective_node_vector_fields, - elem_scalar_var_names, - elem_tensor_var_names, - node_scalar_var_names, - node_vector_var_names, - elem_fields_name, - graphics_id, - mesh.global_num_nodes, - mesh.global_num_elems, - num_nodes_in_elem, - Pn_order, - num_dims, - solver_id); - } + } + // call the .vtu writer for element fields + std::string elem_fields_name = "fields"; - // ******************************** - // Build and write the mat fields - // ******************************** + if(myrank==0){ + write_vtu(collective_node_coords, + collective_nodes_in_elem, + collective_elem_scalar_fields, + collective_elem_tensor_fields, + collective_node_scalar_fields, + collective_node_vector_fields, + elem_scalar_var_names, + elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + elem_fields_name, + graphics_id, + mesh.global_num_nodes, + mesh.global_num_elems, + num_nodes_in_elem, + Pn_order, + num_dims, + solver_id); + } + // ******************************** + // Build and write the mat fields + // ******************************** - // note: the file path and folder was created in the elem and node outputs - size_t num_mat_files_written = 0; - if(num_mat_pt_scalar_vars > 0 || num_mat_pt_tensor_vars >0){ - for (int mat_id = 0; mat_id < num_mats; mat_id++) { + // note: the file path and folder was created in the elem and node outputs + size_t num_mat_files_written = 0; + if(num_mat_pt_scalar_vars > 0 || num_mat_pt_tensor_vars >0){ - const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); - //array storing number of local elems for this material on each process - CArray processes_num_local_mat_elems, gatherv_displacements; - if(myrank==0){ - processes_num_local_mat_elems = CArray(nranks); - gatherv_displacements = CArray(nranks); - } - MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, - MPI_INT, 0, MPI_COMM_WORLD); - - //set global element indices on this rank - HostDistributedMap host_mat_elem_map; - DistributedMap element_map = mesh.element_map; - DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); - for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); - } - host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); - - //allocate arrays for distributed mat elem data - DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); - DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); - - //collect global element indices on rank 0 for this mat - //tally total number of mat elems for rank 0 - DCArrayKokkos global_indices_of_collective_mat_elems; - long long int num_mat_collective_elems = 0; - if(myrank==0){ - for(int irank=0; irank < nranks; irank++){ - gatherv_displacements(irank) = num_mat_collective_elems; - num_mat_collective_elems += processes_num_local_mat_elems(irank); - } - global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); - } - // if(myrank==0){ - // for(int irank=0; irank < nranks; irank++){ - // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; - // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; - // } - // } - MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, - global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), - gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); - - //use indices on rank 0 to construct rank 0 collective map for this mat - HostDistributedMap collective_mat_elem_map; - collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); - //collective_mat_elem_map.print(); - - //collective storage for scalars and tensors using collective elem mat map - DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); - DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); - - // set the nodal vars to zero size, we don't write these fields again - node_scalar_var_names.clear(); - node_vector_var_names.clear(); - - // concatenate material fields into a single array - concatenate_mat_fields(State.MaterialPoints, - host_mat_elem_scalar_fields, - host_mat_elem_tensor_fields, - State.MaterialToMeshMaps.elem_in_mat_elem, - SimulationParameters.output_options.output_mat_pt_state, - num_mat_local_elems, - mat_id, - mat_den_id, - mat_pres_id, - mat_sie_id, - mat_sspd_id, - mat_mass_id, - mat_volfrac_id, - mat_geo_volfrac_id, - mat_eroded_id, - mat_stress_id, - mat_conductivity_id, - mat_specific_heat_id); - - - std::string str_mat_val = std::to_string(mat_id); - std::string mat_fields_name = "mat"; - mat_fields_name += str_mat_val; // add the mat number - - // the number of actual nodes belonging to the part (i.e., the material) - size_t num_mat_nodes = 0; - - //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 - - //collect nodes in elem for this material on rank 0 - DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); - HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 - mat_nodes_in_elem_comms.execute_comms(); + for (int mat_id = 0; mat_id < num_mats; mat_id++) { - HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); - mat_elem_scalars_comms.execute_comms(); - - HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); - mat_elem_tensors_comms.execute_comms(); - - //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat - - // build a unique mesh (element and nodes) for the material (i.e., the part) - DCArrayKokkos collective_mat_node_indices; - if(myrank==0){ - build_material_node_list(mesh, - collective_mat_node_indices, - collective_mat_nodes_in_mat_elem, - State.MaterialToMeshMaps.elem_in_mat_elem, - mat_id, - num_mat_nodes, - num_mat_collective_elems, - num_nodes_in_elem, - num_dims); + const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); + //array storing number of local elems for this material on each process + CArray processes_num_local_mat_elems, gatherv_displacements; + if(myrank==0){ + processes_num_local_mat_elems = CArray(nranks); + gatherv_displacements = CArray(nranks); + } + MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, + MPI_INT, 0, MPI_COMM_WORLD); + + //set global element indices on this rank + HostDistributedMap host_mat_elem_map; + DistributedMap element_map = mesh.element_map; + DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ + global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); + } + host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); + + //allocate arrays for distributed mat elem data + DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); + DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); + + //collect global element indices on rank 0 for this mat + //tally total number of mat elems for rank 0 + DCArrayKokkos global_indices_of_collective_mat_elems; + long long int num_mat_collective_elems = 0; + if(myrank==0){ + for(int irank=0; irank < nranks; irank++){ + gatherv_displacements(irank) = num_mat_collective_elems; + num_mat_collective_elems += processes_num_local_mat_elems(irank); } - - //map object for mat node indices - HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); - - DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); - HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 - mat_node_coords_comms.execute_comms(); - - DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); - HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 - mat_node_scalars_comms.execute_comms(); - - DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); - HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 - mat_node_vectors_comms.execute_comms(); - - //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh - for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { - for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); - } - } // end for elem_gid - - // only write material data if the mat lives on the mesh, ie. has state allocated - if (num_mat_collective_elems>0&&myrank==0){ - // write out a vtu file this - write_vtu(collective_mat_node_coords, - collective_mat_nodes_in_mat_elem, - collective_mat_elem_scalar_fields, - collective_mat_elem_tensor_fields, - collective_mat_node_scalar_fields, - collective_mat_node_vector_fields, - mat_elem_scalar_var_names, - mat_elem_tensor_var_names, - node_scalar_var_names, - node_vector_var_names, - mat_fields_name, - graphics_id, - num_mat_nodes, - num_mat_collective_elems, - num_nodes_in_elem, - Pn_order, - num_dims, - solver_id); - - - num_mat_files_written++; - - } // end for mat_id - - } // end if material is on the mesh - - } // end if mat variables are to be written - - - // ************************************************* - // write Paraview files to open the graphics files - // ************************************************* - - // save the graphics time - graphics_times(graphics_id) = time_value; - - // check to see if an mesh state was written - bool write_mesh_state = false; - if( num_elem_scalar_vars > 0 || - num_elem_tensor_vars > 0 || - num_node_scalar_vars > 0 || - num_node_vector_vars > 0) - { - write_mesh_state = true; - } - // check to see if a mat state was written - bool write_mat_pt_state = false; - if( num_mat_pt_scalar_vars > 0 || - num_mat_pt_tensor_vars > 0) - { - write_mat_pt_state = true; - } - - // call the vtm file writer - std::string mat_fields_name = "mat"; - if(myrank==0){ - write_vtm(graphics_times, - elem_fields_name, - mat_fields_name, - time_value, - graphics_id, - num_mat_files_written, - write_mesh_state, - write_mat_pt_state, - solver_id); - - // call the pvd file writer - write_pvd(graphics_times, - time_value, - graphics_id, - solver_id); - } + global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); + } + // if(myrank==0){ + // for(int irank=0; irank < nranks; irank++){ + // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; + // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; + // } + // } + MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, + global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), + gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + //use indices on rank 0 to construct rank 0 collective map for this mat + HostDistributedMap collective_mat_elem_map; + collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); + //collective_mat_elem_map.print(); + + //collective storage for scalars and tensors using collective elem mat map + DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); + DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); + + // set the nodal vars to zero size, we don't write these fields again + node_scalar_var_names.clear(); + node_vector_var_names.clear(); + + // concatenate material fields into a single array + concatenate_mat_fields(State.MaterialPoints, + host_mat_elem_scalar_fields, + host_mat_elem_tensor_fields, + State.MaterialToMeshMaps.elem_in_mat_elem, + SimulationParameters.output_options.output_mat_pt_state, + num_mat_local_elems, + mat_id, + mat_den_id, + mat_pres_id, + mat_sie_id, + mat_sspd_id, + mat_mass_id, + mat_volfrac_id, + mat_geo_volfrac_id, + mat_eroded_id, + mat_stress_id, + mat_conductivity_id, + mat_specific_heat_id); + + + std::string str_mat_val = std::to_string(mat_id); + std::string mat_fields_name = "mat"; + mat_fields_name += str_mat_val; // add the mat number + + // the number of actual nodes belonging to the part (i.e., the material) + size_t num_mat_nodes = 0; + + //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 + + //collect nodes in elem for this material on rank 0 + DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); + HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 + mat_nodes_in_elem_comms.execute_comms(); - // increment graphics id counter - graphics_id++; // this is private variable in the class + HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); + mat_elem_scalars_comms.execute_comms(); - } // end if viz paraview output is to be written + HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); + mat_elem_tensors_comms.execute_comms(); + //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat + + // build a unique mesh (element and nodes) for the material (i.e., the part) + DCArrayKokkos collective_mat_node_indices; + if(myrank==0){ + build_material_node_list(mesh, + collective_mat_node_indices, + collective_mat_nodes_in_mat_elem, + State.MaterialToMeshMaps.elem_in_mat_elem, + mat_id, + num_mat_nodes, + num_mat_collective_elems, + num_nodes_in_elem, + num_dims); + } + + //map object for mat node indices + HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); - // STATE - if (SimulationParameters.output_options.format == output_options::state || - SimulationParameters.output_options.format == output_options::viz_and_state) { + DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); + HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 + mat_node_coords_comms.execute_comms(); - write_material_point_state(mesh, - State, - SimulationParameters, - time_value, - graphics_times, - node_states, - gauss_pt_states, - material_pt_states); + DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); + HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 + mat_node_scalars_comms.execute_comms(); - } // end if state is to be written + DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); + HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 + mat_node_vectors_comms.execute_comms(); + //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh + for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); + } + } // end for elem_gid + + // only write material data if the mat lives on the mesh, ie. has state allocated + if (num_mat_collective_elems>0&&myrank==0){ + // write out a vtu file this + write_vtu(collective_mat_node_coords, + collective_mat_nodes_in_mat_elem, + collective_mat_elem_scalar_fields, + collective_mat_elem_tensor_fields, + collective_mat_node_scalar_fields, + collective_mat_node_vector_fields, + mat_elem_scalar_var_names, + mat_elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + mat_fields_name, + graphics_id, + num_mat_nodes, + num_mat_collective_elems, + num_nodes_in_elem, + Pn_order, + num_dims, + solver_id); + + + num_mat_files_written++; + + } // end for mat_id + + } // end if material is on the mesh + + } // end if mat variables are to be written + + + // ************************************************* + // write Paraview files to open the graphics files + // ************************************************* + + // save the graphics time + graphics_times(graphics_id) = time_value; - // will drop ensight outputs in the near future - if (SimulationParameters.output_options.format == output_options::ensight){ - write_ensight(mesh, - State, - SimulationParameters, - dt, - time_value, - graphics_times, - node_states, - gauss_pt_states, - material_pt_states); + // check to see if an mesh state was written + bool write_mesh_state = false; + if( num_elem_scalar_vars > 0 || + num_elem_tensor_vars > 0 || + num_node_scalar_vars > 0 || + num_node_vector_vars > 0) + { + write_mesh_state = true; + } + // check to see if a mat state was written + bool write_mat_pt_state = false; + if( num_mat_pt_scalar_vars > 0 || + num_mat_pt_tensor_vars > 0) + { + write_mat_pt_state = true; } - return; + // call the vtm file writer + std::string mat_fields_name = "mat"; + if(myrank==0){ + write_vtm(graphics_times, + elem_fields_name, + mat_fields_name, + time_value, + graphics_id, + num_mat_files_written, + write_mesh_state, + write_mat_pt_state, + solver_id); + + // call the pvd file writer + write_pvd(graphics_times, + time_value, + graphics_id, + solver_id); + } - } // end write_mesh + // increment graphics id counter + graphics_id++; // this is private variable in the class + } ///////////////////////////////////////////////////////////////////////////// /// From cfa378d0596f1fc3252493d9dd3b538f2fcf7dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 19 Aug 2025 16:51:11 -0600 Subject: [PATCH 45/66] WIP: parallel vtm writer --- .../src/common/include/mesh_io.h | 1255 ++++++++--------- .../src/common/include/state.h | 2 + 2 files changed, 613 insertions(+), 644 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index daf5ac1a8..5270d28a8 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3510,7 +3510,8 @@ class MeshWriter const size_t num_local_nodes = mesh.num_local_nodes; DistributedMap local_element_map = mesh.local_element_map; DistributedMap node_map = mesh.node_map; - DistributedMap nonoverlap_element_node_map = mesh.nonoverlap_element_node_map; + DistributedMap all_node_map = mesh.all_node_map; + DistributedMap nonoverlap_elem_node_map = mesh.nonoverlap_element_node_map; const int Pn_order = mesh.Pn; const size_t num_mats = State.MaterialPoints.num_material_points.size(); @@ -3524,6 +3525,7 @@ class MeshWriter //host version of local element map for argument compatibility DistributedDFArray elem_scalar_fields(local_element_map, num_elem_scalar_vars, "elem_scalars"); DistributedDFArray elem_tensor_fields(local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); + DistributedDFArray nonoverlap_nodes_in_elem(local_element_map, num_nodes_in_elem, "nonoverlap_nodes_in_elem"); elem_scalar_fields.set_values(0.0); elem_tensor_fields.set_values(0.0); @@ -3604,17 +3606,24 @@ class MeshWriter // *************************************************************************** //node data comms - CommPlan node_scalars_comms(nonoverlap_node_scalar_fields, node_scalar_fields); - CommPlan node_vectors_comms(nonoverlap_node_vector_fields, node_vector_fields, node_scalars_comms); + OutputCommPlan node_scalars_comms(nonoverlap_node_scalar_fields, node_scalar_fields); + OutputCommPlan node_vectors_comms(nonoverlap_node_vector_fields, node_vector_fields, node_scalars_comms); node_scalars_comms.execute_comms(); node_vectors_comms.execute_comms(); //nodal coordinates comms //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) - DistributedDFArray nonoverlap_node_coords(nonoverlap_element_node_map, mesh.num_dims); - CommPlan node_coords_comms(nonoverlap_node_coords, State.node.coords, node_scalars_comms); + DistributedDCArray nonoverlap_node_coords(nonoverlap_elem_node_map, mesh.num_dims); + CommPlan node_coords_comms(nonoverlap_node_coords, State.node.coords); node_coords_comms.execute_comms(); + //convert local node ids in nodes in elem so they correspond to nonoverlap map + for (size_t elem_id = 0; elem_id < num_local_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + nonoverlap_nodes_in_elem(elem_id, node_lid) = nonoverlap_elem_node_map.getLocalIndex(all_node_map.getGlobalIndex(mesh.nodes_in_elem(elem_id, node_lid))); + } + } // end for elem_gid + if(myrank==0){ // create the folder structure if it does not exist struct stat st; @@ -3657,7 +3666,7 @@ class MeshWriter if(myrank==0){ write_vtu(nonoverlap_node_coords, - nodes_in_elem, + nonoverlap_nodes_in_elem, elem_scalar_fields, elem_tensor_fields, nonoverlap_node_scalar_fields, @@ -3688,57 +3697,18 @@ class MeshWriter for (int mat_id = 0; mat_id < num_mats; mat_id++) { const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); - //array storing number of local elems for this material on each process - CArray processes_num_local_mat_elems, gatherv_displacements; - if(myrank==0){ - processes_num_local_mat_elems = CArray(nranks); - gatherv_displacements = CArray(nranks); - } - MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, - MPI_INT, 0, MPI_COMM_WORLD); - //set global element indices on this rank - HostDistributedMap host_mat_elem_map; + //set global element indices on this rank for this mat DistributedMap element_map = mesh.element_map; DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); + global_indices_of_local_mat_elems(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); } - host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); + DistributedMap mat_elem_map = DistributedMap(global_indices_of_local_mat_elems); //allocate arrays for distributed mat elem data - DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); - DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); - - //collect global element indices on rank 0 for this mat - //tally total number of mat elems for rank 0 - DCArrayKokkos global_indices_of_collective_mat_elems; - long long int num_mat_collective_elems = 0; - if(myrank==0){ - for(int irank=0; irank < nranks; irank++){ - gatherv_displacements(irank) = num_mat_collective_elems; - num_mat_collective_elems += processes_num_local_mat_elems(irank); - } - global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); - } - // if(myrank==0){ - // for(int irank=0; irank < nranks; irank++){ - // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; - // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; - // } - // } - MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, - global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), - gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); - - //use indices on rank 0 to construct rank 0 collective map for this mat - HostDistributedMap collective_mat_elem_map; - collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); - //collective_mat_elem_map.print(); - - //collective storage for scalars and tensors using collective elem mat map - DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); - DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); + DistributedDFArray mat_elem_scalar_fields(mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); + DistributedDFArray mat_elem_tensor_fields(mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); // set the nodal vars to zero size, we don't write these fields again node_scalar_var_names.clear(); @@ -3746,8 +3716,8 @@ class MeshWriter // concatenate material fields into a single array concatenate_mat_fields(State.MaterialPoints, - host_mat_elem_scalar_fields, - host_mat_elem_tensor_fields, + mat_elem_scalar_fields, + mat_elem_tensor_fields, State.MaterialToMeshMaps.elem_in_mat_elem, SimulationParameters.output_options.output_mat_pt_state, num_mat_local_elems, @@ -3775,63 +3745,55 @@ class MeshWriter //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 //collect nodes in elem for this material on rank 0 - DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); - HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 + DistributedDCArray mat_nodes_in_mat_elem(mat_elem_map, num_nodes_in_elem, "mat_nodes_in_mat_elem"); + CommPlan mat_nodes_in_elem_comms(mat_nodes_in_mat_elem,mesh.nodes_in_elem); //shouldnt do comms since subview of map on this rank mat_nodes_in_elem_comms.execute_comms(); - HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); - mat_elem_scalars_comms.execute_comms(); - - HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); - mat_elem_tensors_comms.execute_comms(); - //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat // build a unique mesh (element and nodes) for the material (i.e., the part) - DCArrayKokkos collective_mat_node_indices; - if(myrank==0){ + DCArrayKokkos mat_node_indices; build_material_node_list(mesh, - collective_mat_node_indices, - collective_mat_nodes_in_mat_elem, + mat_node_indices, + mat_nodes_in_mat_elem, State.MaterialToMeshMaps.elem_in_mat_elem, mat_id, num_mat_nodes, - num_mat_collective_elems, + num_mat_local_elems, num_nodes_in_elem, num_dims); - } //map object for mat node indices - HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); + DistributedMap collective_mat_node_map = DistributedMap(mat_node_indices); - DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); - HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 + DistributedDCArray mat_node_coords(mat_node_map, num_dims, "mat_node_coords"); + CommPlan mat_node_coords_comms(mat_node_coords,State.node.coords); //shouldnt do comms since subview of map on this rank mat_node_coords_comms.execute_comms(); - DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); - HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 + DistributedDFArray mat_node_scalar_fields(mat_node_map, num_node_scalar_vars, "mat_node_scalars"); + OutputCommPlan mat_node_scalars_comms(mat_node_scalar_fields,node_scalar_fields); //shouldnt do comms since subview of map on this rank mat_node_scalars_comms.execute_comms(); - DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); - HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 + DistributedDFArray mat_node_vector_fields(mat_node_map, num_node_vector_vars, 3, "mat_node_vectors"); + OutputCommPlan mat_node_vectors_comms(mat_node_vector_fields,node_vector_fields); //shouldnt do comms since subview of map on this rank mat_node_vectors_comms.execute_comms(); //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh - for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { + for (size_t elem_id = 0; elem_id < num_mat_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); + mat_nodes_in_mat_elem(elem_id, node_lid) = mat_node_map.getLocalIndex(mesh.all_node_map.getGlobalIndex(mat_nodes_in_mat_elem(elem_id, node_lid))); } } // end for elem_gid // only write material data if the mat lives on the mesh, ie. has state allocated if (num_mat_collective_elems>0&&myrank==0){ // write out a vtu file this - write_vtu(collective_mat_node_coords, - collective_mat_nodes_in_mat_elem, - collective_mat_elem_scalar_fields, - collective_mat_elem_tensor_fields, - collective_mat_node_scalar_fields, - collective_mat_node_vector_fields, + write_vtu(mat_node_coords, + mat_nodes_in_mat_elem, + mat_elem_scalar_fields, + mat_elem_tensor_fields, + mat_node_scalar_fields, + mat_node_vector_fields, mat_elem_scalar_var_names, mat_elem_tensor_var_names, node_scalar_var_names, @@ -3839,7 +3801,7 @@ class MeshWriter mat_fields_name, graphics_id, num_mat_nodes, - num_mat_collective_elems, + num_mat_local_elems, num_nodes_in_elem, Pn_order, num_dims, @@ -3968,521 +3930,521 @@ class MeshWriter const int mat_conductivity_id, const int mat_specific_heat_id) { - // ************************************** - // build and save element average fields - // ************************************** - - // short hand - const size_t num_nodes = mesh.num_nodes; - const size_t num_elems = mesh.num_elems; - const size_t num_dims = mesh.num_dims; - const size_t num_nodes_in_elem = mesh.num_nodes_in_elem; - const size_t num_local_elems = mesh.num_local_elems; - const size_t num_local_nodes = mesh.num_local_nodes; - DistributedMap local_element_map = mesh.local_element_map; - DistributedMap node_map = mesh.node_map; - const int Pn_order = mesh.Pn; - - const size_t num_mats = State.MaterialPoints.num_material_points.size(); + // // ************************************** + // // build and save element average fields + // // ************************************** + + // // short hand + // const size_t num_nodes = mesh.num_nodes; + // const size_t num_elems = mesh.num_elems; + // const size_t num_dims = mesh.num_dims; + // const size_t num_nodes_in_elem = mesh.num_nodes_in_elem; + // const size_t num_local_elems = mesh.num_local_elems; + // const size_t num_local_nodes = mesh.num_local_nodes; + // DistributedMap local_element_map = mesh.local_element_map; + // DistributedMap node_map = mesh.node_map; + // const int Pn_order = mesh.Pn; + + // const size_t num_mats = State.MaterialPoints.num_material_points.size(); - int myrank, nranks; - MPI_Comm_rank(MPI_COMM_WORLD,&myrank); - MPI_Comm_size(MPI_COMM_WORLD,&nranks); - - /* save the elem state to an array for exporting to graphics files*/ - - //host version of local element map for argument compatibility - HostDistributedMap host_local_element_map; - DCArrayKokkos global_indices_of_local_elements(num_local_elems); - for(int ielem = 0; ielem < num_local_elems; ielem++){ - global_indices_of_local_elements(ielem) = local_element_map.getGlobalIndex(ielem); - } - host_local_element_map = HostDistributedMap(global_indices_of_local_elements); - DistributedDFArray elem_scalar_fields(local_element_map, num_elem_scalar_vars, "elem_scalars"); - DistributedDFArray elem_tensor_fields(local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); - elem_scalar_fields.set_values(0.0); - elem_tensor_fields.set_values(0.0); - //duplicate for now to allow compatibility with comm plan object when using Tpetra (src and dst device type must be equal) - //We dont want to make a dual view of the rank 0 collector since that will blow device memory constraints sooner than this duplicate - //one other option is to just do the concatenation ops on the host - DistributedFArray host_elem_scalar_fields(host_local_element_map, num_elem_scalar_vars, "elem_scalars"); - DistributedFArray host_elem_tensor_fields(host_local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); - - - // ----------------------------------------------------------------------- - // save the output fields to a single element average array for all state - // ----------------------------------------------------------------------- - for (int mat_id = 0; mat_id < num_mats; mat_id++) { - - // material point and guass point state are concatenated together - concatenate_elem_fields(State.MaterialPoints, - State.GaussPoints, - elem_scalar_fields, - elem_tensor_fields, - State.MaterialToMeshMaps.elem_in_mat_elem, - SimulationParameters.output_options.output_elem_state, - SimulationParameters.output_options.output_gauss_pt_state, - State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), - mat_id, - num_local_elems, - den_id, - pres_id, - sie_id, - sspd_id, - mass_id, - stress_id, - vol_id, - div_id, - level_set_id, - vel_grad_id, - conductivity_id, - specific_heat_id); - } // end for mats - - // make specific fields for the element average - if (sie_id>=0){ - FOR_ALL(elem_gid, 0, num_local_elems, { - // get sie by dividing by the mass - elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(elem_gid, mass_id)+1.e-20); - }); - } // end if - - Kokkos::fence(); - elem_scalar_fields.update_host(); - elem_tensor_fields.update_host(); - - // ----------------------------------------------------------------------- - // copy the output fields to host side array compatible with Tpetra Comms - // ----------------------------------------------------------------------- - - for (int mat_id = 0; mat_id < num_mats; mat_id++) { + // int myrank, nranks; + // MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + // MPI_Comm_size(MPI_COMM_WORLD,&nranks); + + // /* save the elem state to an array for exporting to graphics files*/ + + // //host version of local element map for argument compatibility + // HostDistributedMap host_local_element_map; + // DCArrayKokkos global_indices_of_local_elements(num_local_elems); + // for(int ielem = 0; ielem < num_local_elems; ielem++){ + // global_indices_of_local_elements(ielem) = local_element_map.getGlobalIndex(ielem); + // } + // host_local_element_map = HostDistributedMap(global_indices_of_local_elements); + // DistributedDFArray elem_scalar_fields(local_element_map, num_elem_scalar_vars, "elem_scalars"); + // DistributedDFArray elem_tensor_fields(local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); + // elem_scalar_fields.set_values(0.0); + // elem_tensor_fields.set_values(0.0); + // //duplicate for now to allow compatibility with comm plan object when using Tpetra (src and dst device type must be equal) + // //We dont want to make a dual view of the rank 0 collector since that will blow device memory constraints sooner than this duplicate + // //one other option is to just do the concatenation ops on the host + // DistributedFArray host_elem_scalar_fields(host_local_element_map, num_elem_scalar_vars, "elem_scalars"); + // DistributedFArray host_elem_tensor_fields(host_local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); + + + // // ----------------------------------------------------------------------- + // // save the output fields to a single element average array for all state + // // ----------------------------------------------------------------------- + // for (int mat_id = 0; mat_id < num_mats; mat_id++) { + + // // material point and guass point state are concatenated together + // concatenate_elem_fields(State.MaterialPoints, + // State.GaussPoints, + // elem_scalar_fields, + // elem_tensor_fields, + // State.MaterialToMeshMaps.elem_in_mat_elem, + // SimulationParameters.output_options.output_elem_state, + // SimulationParameters.output_options.output_gauss_pt_state, + // State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), + // mat_id, + // num_local_elems, + // den_id, + // pres_id, + // sie_id, + // sspd_id, + // mass_id, + // stress_id, + // vol_id, + // div_id, + // level_set_id, + // vel_grad_id, + // conductivity_id, + // specific_heat_id); + // } // end for mats + + // // make specific fields for the element average + // if (sie_id>=0){ + // FOR_ALL(elem_gid, 0, num_local_elems, { + // // get sie by dividing by the mass + // elem_scalar_fields(elem_gid, sie_id) /= (elem_scalar_fields(elem_gid, mass_id)+1.e-20); + // }); + // } // end if + + // Kokkos::fence(); + // elem_scalar_fields.update_host(); + // elem_tensor_fields.update_host(); + + // // ----------------------------------------------------------------------- + // // copy the output fields to host side array compatible with Tpetra Comms + // // ----------------------------------------------------------------------- + + // for (int mat_id = 0; mat_id < num_mats; mat_id++) { - // material point and guass point state are concatenated together - copy_elem_fields(elem_scalar_fields, - elem_tensor_fields, - host_elem_scalar_fields, - host_elem_tensor_fields, - State.MaterialToMeshMaps.elem_in_mat_elem, - SimulationParameters.output_options.output_elem_state, - SimulationParameters.output_options.output_gauss_pt_state, - State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), - mat_id, - num_local_elems, - den_id, - pres_id, - sie_id, - sspd_id, - mass_id, - stress_id, - vol_id, - div_id, - level_set_id, - vel_grad_id, - conductivity_id, - specific_heat_id); - } // end for mats + // // material point and guass point state are concatenated together + // copy_elem_fields(elem_scalar_fields, + // elem_tensor_fields, + // host_elem_scalar_fields, + // host_elem_tensor_fields, + // State.MaterialToMeshMaps.elem_in_mat_elem, + // SimulationParameters.output_options.output_elem_state, + // SimulationParameters.output_options.output_gauss_pt_state, + // State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id), + // mat_id, + // num_local_elems, + // den_id, + // pres_id, + // sie_id, + // sspd_id, + // mass_id, + // stress_id, + // vol_id, + // div_id, + // level_set_id, + // vel_grad_id, + // conductivity_id, + // specific_heat_id); + // } // end for mats - // ************************ - // Build the nodal fields - // ************************ - - //host version of local node map for argument compatibility - HostDistributedMap host_node_map; - DCArrayKokkos global_indices_of_local_nodes(mesh.num_local_nodes); - for(int inode = 0; inode < mesh.num_local_nodes; inode++){ - global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); - } - host_node_map = HostDistributedMap(global_indices_of_local_nodes); - - // save the nodal fields to an array for exporting to graphics files - DistributedDFArray node_scalar_fields(node_map, num_node_scalar_vars, "node_scalars"); - DistributedDFArray node_vector_fields(node_map, num_node_vector_vars, 3, "node_tenors"); - DistributedFArray host_node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); - DistributedFArray host_node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); + // // ************************ + // // Build the nodal fields + // // ************************ + + // //host version of local node map for argument compatibility + // HostDistributedMap host_node_map; + // DCArrayKokkos global_indices_of_local_nodes(mesh.num_local_nodes); + // for(int inode = 0; inode < mesh.num_local_nodes; inode++){ + // global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); + // } + // host_node_map = HostDistributedMap(global_indices_of_local_nodes); + + // // save the nodal fields to an array for exporting to graphics files + // DistributedDFArray node_scalar_fields(node_map, num_node_scalar_vars, "node_scalars"); + // DistributedDFArray node_vector_fields(node_map, num_node_vector_vars, 3, "node_tenors"); + // DistributedFArray host_node_scalar_fields(host_node_map, num_node_scalar_vars, "node_scalars"); + // DistributedFArray host_node_vector_fields(host_node_map, num_node_vector_vars, 3, "node_tenors"); - concatenate_nodal_fields(State.node, - node_scalar_fields, - node_vector_fields, - SimulationParameters.output_options.output_node_state, - dt, - num_local_nodes, - num_dims, - node_mass_id, - node_vel_id, - node_accel_id, - node_coord_id, - node_grad_level_set_id, - node_temp_id); + // concatenate_nodal_fields(State.node, + // node_scalar_fields, + // node_vector_fields, + // SimulationParameters.output_options.output_node_state, + // dt, + // num_local_nodes, + // num_dims, + // node_mass_id, + // node_vel_id, + // node_accel_id, + // node_coord_id, + // node_grad_level_set_id, + // node_temp_id); - Kokkos::fence(); - node_scalar_fields.update_host(); - node_vector_fields.update_host(); - - copy_nodal_fields(host_node_scalar_fields, - host_node_vector_fields, - node_scalar_fields, - node_vector_fields, - SimulationParameters.output_options.output_node_state, - dt, - num_local_nodes, - num_dims, - node_mass_id, - node_vel_id, - node_accel_id, - node_coord_id, - node_grad_level_set_id, - node_temp_id); - - // ************************************************** - // Collective communications for node and elem data - // ************************************************** - - //elem data collective comms - //collective map has all indices on rank 0 and non on other ranks - HostDistributedMap collective_elem_map; - long long int num_collective_elem_indices = 0; - if(myrank==0) num_collective_elem_indices = mesh.global_num_elems; - collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); - - //collective vector and comms to the collective vector for elem fields - DistributedFArray collective_elem_scalar_fields(collective_elem_map, num_elem_scalar_vars, "elem_scalars_collective"); - DistributedFArray collective_elem_tensor_fields(collective_elem_map, num_elem_tensor_vars, 3, 3, "elem_tensors_collective"); - HostCommPlan collective_elem_scalars_comms(collective_elem_scalar_fields, host_elem_scalar_fields); - HostCommPlan collective_elem_tensors_comms(collective_elem_tensor_fields, host_elem_tensor_fields, collective_elem_scalars_comms); - collective_elem_scalars_comms.execute_comms(); - collective_elem_tensors_comms.execute_comms(); - - //host of node in elem for Trilinos template argument compatibility - DistributedFArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem); - - //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) - for (size_t elem_id = 0; elem_id < num_local_elems; elem_id++) { - for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem(elem_id, node_lid)); - } - } // end for elem_gid - - //collect nodes in elem with a conversion back to global node ids - DistributedFArray collective_nodes_in_elem(collective_elem_map, mesh.num_nodes_in_elem); - HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem); - - nodes_in_elem_comms.execute_comms(); - - //node data collective comms - //collective map has all indices on rank 0 and non on other ranks - HostDistributedMap collective_node_map; - long long int num_collective_node_indices = 0; - if(myrank==0) num_collective_node_indices = mesh.global_num_nodes; - collective_node_map = HostDistributedMap(mesh.global_num_nodes, num_collective_node_indices); - - //collective vector and comms to the collective vector for node fields - DistributedFArray collective_node_scalar_fields(collective_node_map, num_node_scalar_vars); - DistributedFArray collective_node_vector_fields(collective_node_map, num_node_vector_vars, 3); - HostCommPlan collective_node_scalars_comms(collective_node_scalar_fields, host_node_scalar_fields); - HostCommPlan collective_node_vectors_comms(collective_node_vector_fields, host_node_vector_fields, collective_node_scalars_comms); - collective_node_scalars_comms.execute_comms(); - collective_node_vectors_comms.execute_comms(); - - //collect nodal coordinates - //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) - DistributedFArray host_node_coords(host_node_map, mesh.num_dims); - for (size_t node_id = 0; node_id < num_local_nodes; node_id++) { - for (int idim = 0; idim < mesh.num_dims; idim++) { - host_node_coords(node_id, idim) = State.node.coords.host(node_id, idim); - } - } // end for elem_gid - DistributedFArray collective_node_coords(collective_node_map, mesh.num_dims); - HostCommPlan collective_node_coords_comms(collective_node_coords, host_node_coords, collective_node_scalars_comms); - collective_node_coords_comms.execute_comms(); - - if(myrank==0){ - // create the folder structure if it does not exist - struct stat st; - - if (stat("vtk", &st) != 0) { - int returnCode = system("mkdir vtk"); - - if (returnCode == 1) { - std::cout << "Unable to make vtk directory" << std::endl; - } - } - else{ - if(solver_id==0 && graphics_id==0){ - // delete the existing files inside - int returnCode = system("rm vtk/Fierro*"); - if (returnCode == 1) { - std::cout << "Unable to clear vtk/Fierro directory" << std::endl; - } - } - } - - if (stat("vtk/data", &st) != 0) { - int returnCode = system("mkdir vtk/data"); - if (returnCode == 1) { - std::cout << "Unable to make vtk/data directory" << std::endl; - } - } - else{ - if(solver_id==0 && graphics_id==0){ - // delete the existing files inside the folder - int returnCode = system("rm vtk/data/Fierro*"); - if (returnCode == 1) { - std::cout << "Unable to clear vtk/data directory" << std::endl; - } - } - } - } - // call the .vtu writer for element fields - std::string elem_fields_name = "fields"; - - if(myrank==0){ - write_vtu(collective_node_coords, - collective_nodes_in_elem, - collective_elem_scalar_fields, - collective_elem_tensor_fields, - collective_node_scalar_fields, - collective_node_vector_fields, - elem_scalar_var_names, - elem_tensor_var_names, - node_scalar_var_names, - node_vector_var_names, - elem_fields_name, - graphics_id, - mesh.global_num_nodes, - mesh.global_num_elems, - num_nodes_in_elem, - Pn_order, - num_dims, - solver_id); - } - - // ******************************** - // Build and write the mat fields - // ******************************** - - - // note: the file path and folder was created in the elem and node outputs - size_t num_mat_files_written = 0; - if(num_mat_pt_scalar_vars > 0 || num_mat_pt_tensor_vars >0){ - - for (int mat_id = 0; mat_id < num_mats; mat_id++) { - - const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); - //array storing number of local elems for this material on each process - CArray processes_num_local_mat_elems, gatherv_displacements; - if(myrank==0){ - processes_num_local_mat_elems = CArray(nranks); - gatherv_displacements = CArray(nranks); - } - MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, - MPI_INT, 0, MPI_COMM_WORLD); - - //set global element indices on this rank - HostDistributedMap host_mat_elem_map; - DistributedMap element_map = mesh.element_map; - DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); - for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); - } - host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); - - //allocate arrays for distributed mat elem data - DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); - DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); - - //collect global element indices on rank 0 for this mat - //tally total number of mat elems for rank 0 - DCArrayKokkos global_indices_of_collective_mat_elems; - long long int num_mat_collective_elems = 0; - if(myrank==0){ - for(int irank=0; irank < nranks; irank++){ - gatherv_displacements(irank) = num_mat_collective_elems; - num_mat_collective_elems += processes_num_local_mat_elems(irank); - } - global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); - } - // if(myrank==0){ - // for(int irank=0; irank < nranks; irank++){ - // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; - // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; - // } - // } - MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, - global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), - gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); - - //use indices on rank 0 to construct rank 0 collective map for this mat - HostDistributedMap collective_mat_elem_map; - collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); - //collective_mat_elem_map.print(); - - //collective storage for scalars and tensors using collective elem mat map - DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); - DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); - - // set the nodal vars to zero size, we don't write these fields again - node_scalar_var_names.clear(); - node_vector_var_names.clear(); - - // concatenate material fields into a single array - concatenate_mat_fields(State.MaterialPoints, - host_mat_elem_scalar_fields, - host_mat_elem_tensor_fields, - State.MaterialToMeshMaps.elem_in_mat_elem, - SimulationParameters.output_options.output_mat_pt_state, - num_mat_local_elems, - mat_id, - mat_den_id, - mat_pres_id, - mat_sie_id, - mat_sspd_id, - mat_mass_id, - mat_volfrac_id, - mat_geo_volfrac_id, - mat_eroded_id, - mat_stress_id, - mat_conductivity_id, - mat_specific_heat_id); - - - std::string str_mat_val = std::to_string(mat_id); - std::string mat_fields_name = "mat"; - mat_fields_name += str_mat_val; // add the mat number - - // the number of actual nodes belonging to the part (i.e., the material) - size_t num_mat_nodes = 0; - - //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 + // Kokkos::fence(); + // node_scalar_fields.update_host(); + // node_vector_fields.update_host(); + + // copy_nodal_fields(host_node_scalar_fields, + // host_node_vector_fields, + // node_scalar_fields, + // node_vector_fields, + // SimulationParameters.output_options.output_node_state, + // dt, + // num_local_nodes, + // num_dims, + // node_mass_id, + // node_vel_id, + // node_accel_id, + // node_coord_id, + // node_grad_level_set_id, + // node_temp_id); + + // // ************************************************** + // // Collective communications for node and elem data + // // ************************************************** + + // //elem data collective comms + // //collective map has all indices on rank 0 and non on other ranks + // HostDistributedMap collective_elem_map; + // long long int num_collective_elem_indices = 0; + // if(myrank==0) num_collective_elem_indices = mesh.global_num_elems; + // collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); + + // //collective vector and comms to the collective vector for elem fields + // DistributedFArray collective_elem_scalar_fields(collective_elem_map, num_elem_scalar_vars, "elem_scalars_collective"); + // DistributedFArray collective_elem_tensor_fields(collective_elem_map, num_elem_tensor_vars, 3, 3, "elem_tensors_collective"); + // HostCommPlan collective_elem_scalars_comms(collective_elem_scalar_fields, host_elem_scalar_fields); + // HostCommPlan collective_elem_tensors_comms(collective_elem_tensor_fields, host_elem_tensor_fields, collective_elem_scalars_comms); + // collective_elem_scalars_comms.execute_comms(); + // collective_elem_tensors_comms.execute_comms(); + + // //host of node in elem for Trilinos template argument compatibility + // DistributedFArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem); + + // //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) + // for (size_t elem_id = 0; elem_id < num_local_elems; elem_id++) { + // for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + // host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem(elem_id, node_lid)); + // } + // } // end for elem_gid + + // //collect nodes in elem with a conversion back to global node ids + // DistributedFArray collective_nodes_in_elem(collective_elem_map, mesh.num_nodes_in_elem); + // HostCommPlan nodes_in_elem_comms(collective_nodes_in_elem, host_local_nodes_in_elem); + + // nodes_in_elem_comms.execute_comms(); + + // //node data collective comms + // //collective map has all indices on rank 0 and non on other ranks + // HostDistributedMap collective_node_map; + // long long int num_collective_node_indices = 0; + // if(myrank==0) num_collective_node_indices = mesh.global_num_nodes; + // collective_node_map = HostDistributedMap(mesh.global_num_nodes, num_collective_node_indices); + + // //collective vector and comms to the collective vector for node fields + // DistributedFArray collective_node_scalar_fields(collective_node_map, num_node_scalar_vars); + // DistributedFArray collective_node_vector_fields(collective_node_map, num_node_vector_vars, 3); + // HostCommPlan collective_node_scalars_comms(collective_node_scalar_fields, host_node_scalar_fields); + // HostCommPlan collective_node_vectors_comms(collective_node_vector_fields, host_node_vector_fields, collective_node_scalars_comms); + // collective_node_scalars_comms.execute_comms(); + // collective_node_vectors_comms.execute_comms(); + + // //collect nodal coordinates + // //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) + // DistributedFArray host_node_coords(host_node_map, mesh.num_dims); + // for (size_t node_id = 0; node_id < num_local_nodes; node_id++) { + // for (int idim = 0; idim < mesh.num_dims; idim++) { + // host_node_coords(node_id, idim) = State.node.coords.host(node_id, idim); + // } + // } // end for elem_gid + // DistributedFArray collective_node_coords(collective_node_map, mesh.num_dims); + // HostCommPlan collective_node_coords_comms(collective_node_coords, host_node_coords, collective_node_scalars_comms); + // collective_node_coords_comms.execute_comms(); + + // if(myrank==0){ + // // create the folder structure if it does not exist + // struct stat st; + + // if (stat("vtk", &st) != 0) { + // int returnCode = system("mkdir vtk"); + + // if (returnCode == 1) { + // std::cout << "Unable to make vtk directory" << std::endl; + // } + // } + // else{ + // if(solver_id==0 && graphics_id==0){ + // // delete the existing files inside + // int returnCode = system("rm vtk/Fierro*"); + // if (returnCode == 1) { + // std::cout << "Unable to clear vtk/Fierro directory" << std::endl; + // } + // } + // } + + // if (stat("vtk/data", &st) != 0) { + // int returnCode = system("mkdir vtk/data"); + // if (returnCode == 1) { + // std::cout << "Unable to make vtk/data directory" << std::endl; + // } + // } + // else{ + // if(solver_id==0 && graphics_id==0){ + // // delete the existing files inside the folder + // int returnCode = system("rm vtk/data/Fierro*"); + // if (returnCode == 1) { + // std::cout << "Unable to clear vtk/data directory" << std::endl; + // } + // } + // } + // } + // // call the .vtu writer for element fields + // std::string elem_fields_name = "fields"; + + // if(myrank==0){ + // write_vtu(collective_node_coords, + // collective_nodes_in_elem, + // collective_elem_scalar_fields, + // collective_elem_tensor_fields, + // collective_node_scalar_fields, + // collective_node_vector_fields, + // elem_scalar_var_names, + // elem_tensor_var_names, + // node_scalar_var_names, + // node_vector_var_names, + // elem_fields_name, + // graphics_id, + // mesh.global_num_nodes, + // mesh.global_num_elems, + // num_nodes_in_elem, + // Pn_order, + // num_dims, + // solver_id); + // } + + // // ******************************** + // // Build and write the mat fields + // // ******************************** + + + // // note: the file path and folder was created in the elem and node outputs + // size_t num_mat_files_written = 0; + // if(num_mat_pt_scalar_vars > 0 || num_mat_pt_tensor_vars >0){ + + // for (int mat_id = 0; mat_id < num_mats; mat_id++) { + + // const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); + // //array storing number of local elems for this material on each process + // CArray processes_num_local_mat_elems, gatherv_displacements; + // if(myrank==0){ + // processes_num_local_mat_elems = CArray(nranks); + // gatherv_displacements = CArray(nranks); + // } + // MPI_Gather(&num_mat_local_elems,1,MPI_INT,processes_num_local_mat_elems.pointer(),1, + // MPI_INT, 0, MPI_COMM_WORLD); + + // //set global element indices on this rank + // HostDistributedMap host_mat_elem_map; + // DistributedMap element_map = mesh.element_map; + // DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + // for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ + // global_indices_of_local_mat_elems(ielem) = element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); + // } + // host_mat_elem_map = HostDistributedMap(global_indices_of_local_mat_elems); + + // //allocate arrays for distributed mat elem data + // DistributedFArray host_mat_elem_scalar_fields(host_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); + // DistributedFArray host_mat_elem_tensor_fields(host_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors"); + + // //collect global element indices on rank 0 for this mat + // //tally total number of mat elems for rank 0 + // DCArrayKokkos global_indices_of_collective_mat_elems; + // long long int num_mat_collective_elems = 0; + // if(myrank==0){ + // for(int irank=0; irank < nranks; irank++){ + // gatherv_displacements(irank) = num_mat_collective_elems; + // num_mat_collective_elems += processes_num_local_mat_elems(irank); + // } + // global_indices_of_collective_mat_elems = DCArrayKokkos(num_mat_collective_elems); + // } + // // if(myrank==0){ + // // for(int irank=0; irank < nranks; irank++){ + // // std::cout << "NUM local mat elem on rank " << irank << " is " << processes_num_local_mat_elems(irank) << std::endl; + // // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; + // // } + // // } + // MPI_Gatherv(global_indices_of_local_mat_elems.device_pointer(), num_mat_local_elems, MPI_LONG_LONG_INT, + // global_indices_of_collective_mat_elems.device_pointer(), processes_num_local_mat_elems.pointer(), + // gatherv_displacements.pointer(), MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + // //use indices on rank 0 to construct rank 0 collective map for this mat + // HostDistributedMap collective_mat_elem_map; + // collective_mat_elem_map = HostDistributedMap(global_indices_of_collective_mat_elems); + // //collective_mat_elem_map.print(); + + // //collective storage for scalars and tensors using collective elem mat map + // DistributedFArray collective_mat_elem_scalar_fields(collective_mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars_collective"); + // DistributedFArray collective_mat_elem_tensor_fields(collective_mat_elem_map, num_mat_pt_tensor_vars, 3, 3, "mat_elem_tensors_collective"); + + // // set the nodal vars to zero size, we don't write these fields again + // node_scalar_var_names.clear(); + // node_vector_var_names.clear(); + + // // concatenate material fields into a single array + // concatenate_mat_fields(State.MaterialPoints, + // host_mat_elem_scalar_fields, + // host_mat_elem_tensor_fields, + // State.MaterialToMeshMaps.elem_in_mat_elem, + // SimulationParameters.output_options.output_mat_pt_state, + // num_mat_local_elems, + // mat_id, + // mat_den_id, + // mat_pres_id, + // mat_sie_id, + // mat_sspd_id, + // mat_mass_id, + // mat_volfrac_id, + // mat_geo_volfrac_id, + // mat_eroded_id, + // mat_stress_id, + // mat_conductivity_id, + // mat_specific_heat_id); + + + // std::string str_mat_val = std::to_string(mat_id); + // std::string mat_fields_name = "mat"; + // mat_fields_name += str_mat_val; // add the mat number + + // // the number of actual nodes belonging to the part (i.e., the material) + // size_t num_mat_nodes = 0; + + // //communicate scalars, tensors, and nodes in elem to collective mat arrays on rank 0 - //collect nodes in elem for this material on rank 0 - DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); - HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 - mat_nodes_in_elem_comms.execute_comms(); + // //collect nodes in elem for this material on rank 0 + // DistributedFArray collective_mat_nodes_in_mat_elem(collective_mat_elem_map, num_nodes_in_elem, "collective_mat_nodes_in_mat_elem"); + // HostCommPlan mat_nodes_in_elem_comms(collective_mat_nodes_in_mat_elem,collective_nodes_in_elem); //doesnt really do comms since all on rank 0 + // mat_nodes_in_elem_comms.execute_comms(); - HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); - mat_elem_scalars_comms.execute_comms(); + // HostCommPlan mat_elem_scalars_comms(collective_mat_elem_scalar_fields,host_mat_elem_scalar_fields); + // mat_elem_scalars_comms.execute_comms(); - HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); - mat_elem_tensors_comms.execute_comms(); + // HostCommPlan mat_elem_tensors_comms(collective_mat_elem_tensor_fields,host_mat_elem_tensor_fields); + // mat_elem_tensors_comms.execute_comms(); - //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat + // //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat - // build a unique mesh (element and nodes) for the material (i.e., the part) - DCArrayKokkos collective_mat_node_indices; - if(myrank==0){ - build_material_node_list(mesh, - collective_mat_node_indices, - collective_mat_nodes_in_mat_elem, - State.MaterialToMeshMaps.elem_in_mat_elem, - mat_id, - num_mat_nodes, - num_mat_collective_elems, - num_nodes_in_elem, - num_dims); - } + // // build a unique mesh (element and nodes) for the material (i.e., the part) + // DCArrayKokkos collective_mat_node_indices; + // if(myrank==0){ + // build_material_node_list(mesh, + // collective_mat_node_indices, + // collective_mat_nodes_in_mat_elem, + // State.MaterialToMeshMaps.elem_in_mat_elem, + // mat_id, + // num_mat_nodes, + // num_mat_collective_elems, + // num_nodes_in_elem, + // num_dims); + // } - //map object for mat node indices - HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); - - DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); - HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 - mat_node_coords_comms.execute_comms(); - - DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); - HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 - mat_node_scalars_comms.execute_comms(); - - DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); - HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 - mat_node_vectors_comms.execute_comms(); - - //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh - for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { - for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); - } - } // end for elem_gid + // //map object for mat node indices + // HostDistributedMap collective_mat_node_map = HostDistributedMap(collective_mat_node_indices); + + // DistributedFArray collective_mat_node_coords(collective_mat_node_map, num_dims, "collective_mat_node_coords"); + // HostCommPlan mat_node_coords_comms(collective_mat_node_coords,collective_node_coords); //doesnt really do comms since all on rank 0 + // mat_node_coords_comms.execute_comms(); + + // DistributedFArray collective_mat_node_scalar_fields(collective_mat_node_map, num_node_scalar_vars, "collective_mat_node_scalars"); + // HostCommPlan mat_node_scalars_comms(collective_mat_node_scalar_fields,collective_node_scalar_fields); //doesnt really do comms since all on rank 0 + // mat_node_scalars_comms.execute_comms(); + + // DistributedFArray collective_mat_node_vector_fields(collective_mat_node_map, num_node_vector_vars, 3, "collective_mat_node_vectors"); + // HostCommPlan mat_node_vectors_comms(collective_mat_node_vector_fields,collective_node_vector_fields); //doesnt really do comms since all on rank 0 + // mat_node_vectors_comms.execute_comms(); + + // //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh + // for (size_t elem_id = 0; elem_id < num_mat_collective_elems; elem_id++) { + // for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + // collective_mat_nodes_in_mat_elem(elem_id, node_lid) = collective_mat_node_map.getLocalIndex(collective_mat_nodes_in_mat_elem(elem_id, node_lid)); + // } + // } // end for elem_gid - // only write material data if the mat lives on the mesh, ie. has state allocated - if (num_mat_collective_elems>0&&myrank==0){ - // write out a vtu file this - write_vtu(collective_mat_node_coords, - collective_mat_nodes_in_mat_elem, - collective_mat_elem_scalar_fields, - collective_mat_elem_tensor_fields, - collective_mat_node_scalar_fields, - collective_mat_node_vector_fields, - mat_elem_scalar_var_names, - mat_elem_tensor_var_names, - node_scalar_var_names, - node_vector_var_names, - mat_fields_name, - graphics_id, - num_mat_nodes, - num_mat_collective_elems, - num_nodes_in_elem, - Pn_order, - num_dims, - solver_id); - - - num_mat_files_written++; - - } // end for mat_id - - } // end if material is on the mesh - - } // end if mat variables are to be written - - - // ************************************************* - // write Paraview files to open the graphics files - // ************************************************* - - // save the graphics time - graphics_times(graphics_id) = time_value; - - // check to see if an mesh state was written - bool write_mesh_state = false; - if( num_elem_scalar_vars > 0 || - num_elem_tensor_vars > 0 || - num_node_scalar_vars > 0 || - num_node_vector_vars > 0) - { - write_mesh_state = true; - } - // check to see if a mat state was written - bool write_mat_pt_state = false; - if( num_mat_pt_scalar_vars > 0 || - num_mat_pt_tensor_vars > 0) - { - write_mat_pt_state = true; - } - - // call the vtm file writer - std::string mat_fields_name = "mat"; - if(myrank==0){ - write_vtm(graphics_times, - elem_fields_name, - mat_fields_name, - time_value, - graphics_id, - num_mat_files_written, - write_mesh_state, - write_mat_pt_state, - solver_id); - - // call the pvd file writer - write_pvd(graphics_times, - time_value, - graphics_id, - solver_id); - } - - // increment graphics id counter - graphics_id++; // this is private variable in the class + // // only write material data if the mat lives on the mesh, ie. has state allocated + // if (num_mat_collective_elems>0&&myrank==0){ + // // write out a vtu file this + // write_vtu(collective_mat_node_coords, + // collective_mat_nodes_in_mat_elem, + // collective_mat_elem_scalar_fields, + // collective_mat_elem_tensor_fields, + // collective_mat_node_scalar_fields, + // collective_mat_node_vector_fields, + // mat_elem_scalar_var_names, + // mat_elem_tensor_var_names, + // node_scalar_var_names, + // node_vector_var_names, + // mat_fields_name, + // graphics_id, + // num_mat_nodes, + // num_mat_collective_elems, + // num_nodes_in_elem, + // Pn_order, + // num_dims, + // solver_id); + + + // num_mat_files_written++; + + // } // end for mat_id + + // } // end if material is on the mesh + + // } // end if mat variables are to be written + + + // // ************************************************* + // // write Paraview files to open the graphics files + // // ************************************************* + + // // save the graphics time + // graphics_times(graphics_id) = time_value; + + // // check to see if an mesh state was written + // bool write_mesh_state = false; + // if( num_elem_scalar_vars > 0 || + // num_elem_tensor_vars > 0 || + // num_node_scalar_vars > 0 || + // num_node_vector_vars > 0) + // { + // write_mesh_state = true; + // } + // // check to see if a mat state was written + // bool write_mat_pt_state = false; + // if( num_mat_pt_scalar_vars > 0 || + // num_mat_pt_tensor_vars > 0) + // { + // write_mat_pt_state = true; + // } + + // // call the vtm file writer + // std::string mat_fields_name = "mat"; + // if(myrank==0){ + // write_vtm(graphics_times, + // elem_fields_name, + // mat_fields_name, + // time_value, + // graphics_id, + // num_mat_files_written, + // write_mesh_state, + // write_mat_pt_state, + // solver_id); + + // // call the pvd file writer + // write_pvd(graphics_times, + // time_value, + // graphics_id, + // solver_id); + // } + + // // increment graphics id counter + // graphics_id++; // this is private variable in the class } ///////////////////////////////////////////////////////////////////////////// @@ -5757,8 +5719,8 @@ class MeshWriter /// ///////////////////////////////////////////////////////////////////////////// void concatenate_mat_fields(const MaterialPoint_t& MaterialPoints, - DistributedFArray& mat_elem_scalar_fields, - DistributedFArray& mat_elem_tensor_fields, + DistributedDFArray& mat_elem_scalar_fields, + DistributedDFArray& mat_elem_tensor_fields, const DRaggedRightArrayKokkos& elem_in_mat_elem, const std::vector& output_material_pt_states, const size_t num_mat_elems, @@ -5776,76 +5738,74 @@ class MeshWriter const int mat_specific_heat_id) { - // --- loop over the material point states - for (auto field : output_material_pt_states){ switch(field){ // scalar vars case material_pt_state::density: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field - mat_elem_scalar_fields(mat_elem_lid, mat_den_id) = MaterialPoints.den.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_den_id) = MaterialPoints.den(mat_id, mat_elem_lid); + }); break; case material_pt_state::pressure: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++) { + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field - mat_elem_scalar_fields(mat_elem_lid, mat_pres_id) = MaterialPoints.pres.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_pres_id) = MaterialPoints.pres(mat_id, mat_elem_lid); + }); break; case material_pt_state::specific_internal_energy: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field // extensive ie here, but after this function, it will become specific ie - mat_elem_scalar_fields(mat_elem_lid, mat_sie_id) = MaterialPoints.sie.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_sie_id) = MaterialPoints.sie(mat_id, mat_elem_lid); + }); break; case material_pt_state::sound_speed: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field - mat_elem_scalar_fields(mat_elem_lid, mat_sspd_id) = MaterialPoints.sspd.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_sspd_id) = MaterialPoints.sspd(mat_id, mat_elem_lid); + }); break; case material_pt_state::mass: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field - mat_elem_scalar_fields(mat_elem_lid, mat_mass_id) = MaterialPoints.mass.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_mass_id) = MaterialPoints.mass(mat_id, mat_elem_lid); + }); break; case material_pt_state::volume_fraction: // material volume fraction - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field // this is the volume fraction of a material within a part - mat_elem_scalar_fields(mat_elem_lid, mat_volfrac_id) = MaterialPoints.volfrac.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_volfrac_id) = MaterialPoints.volfrac(mat_id, mat_elem_lid); + }); // geometric volume fraction - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field // this is the geometric volume fraction (interface reconstruction) - mat_elem_scalar_fields(mat_elem_lid, mat_geo_volfrac_id) = MaterialPoints.geo_volfrac.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_geo_volfrac_id) = MaterialPoints.geo_volfrac(mat_id, mat_elem_lid); + }); break; case material_pt_state::eroded_flag: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field - mat_elem_scalar_fields(mat_elem_lid, mat_eroded_id) = (double)MaterialPoints.eroded.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_lid, mat_eroded_id) = (double)MaterialPoints.eroded(mat_id, mat_elem_lid); + }); break; // --------------- // tensor vars // --------------- case material_pt_state::stress: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // field // average tensor fields, it is always 3D @@ -5855,27 +5815,33 @@ class MeshWriter // stress tensor mat_elem_tensor_fields(mat_elem_lid, mat_stress_id, i, j) = - MaterialPoints.stress.host(mat_id, mat_elem_lid,i,j); + MaterialPoints.stress(mat_id, mat_elem_lid,i,j); } // end for } // end for - } + }); break; // thermal solver vars case material_pt_state::thermal_conductivity: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + + // get elem gid + size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - mat_elem_scalar_fields(mat_elem_lid, mat_conductivity_id) = MaterialPoints.conductivity.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_gid, mat_conductivity_id) += MaterialPoints.conductivity(mat_id, mat_elem_lid); + }); break; case material_pt_state::specific_heat: - for(int mat_elem_lid= 0; mat_elem_lid < num_mat_elems; mat_elem_lid++){ + FOR_ALL(mat_elem_lid, 0, num_mat_elems, { + + // get elem gid + size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); // field - mat_elem_scalar_fields(mat_elem_lid, mat_specific_heat_id) = MaterialPoints.specific_heat.host(mat_id, mat_elem_lid); - } + mat_elem_scalar_fields(mat_elem_gid, mat_specific_heat_id) += MaterialPoints.specific_heat(mat_id, mat_elem_lid); + }); break; // add other variables here @@ -5890,7 +5856,7 @@ class MeshWriter case material_pt_state::heat_flux: break; } // end switch - } + }// end for over mat point state @@ -6165,12 +6131,12 @@ class MeshWriter /// ///////////////////////////////////////////////////////////////////////////// void write_vtu( - const DistributedFArray& node_coords_host, - const DistributedFArray& nodes_in_elem_host, - const DistributedFArray& elem_scalar_fields, - const DistributedFArray& elem_tensor_fields, - const DistributedFArray& node_scalar_fields, - const DistributedFArray& node_vector_fields, + const DistributedDCArray& node_coords, + const DistributedDCArray& nodes_in_elem, + const DistributedDFArray& elem_scalar_fields, + const DistributedDFArray& elem_tensor_fields, + const DistributedDFArray& node_scalar_fields, + const DistributedDFArray& node_vector_fields, const std::vector& elem_scalar_var_names, const std::vector& elem_tensor_var_names, const std::vector& node_scalar_var_names, @@ -6226,12 +6192,12 @@ class MeshWriter for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { double coord_z = 0.0; if(num_dims==3){ - coord_z = node_coords_host(node_gid, 2); + coord_z = node_coords.host(node_gid, 2); } fprintf(out[0], " %f %f %f\n", - node_coords_host(node_gid, 0), - node_coords_host(node_gid, 1), + node_coords.host(node_gid, 0), + node_coords.host(node_gid, 1), coord_z); } // end for fprintf(out[0], " \n"); @@ -6264,7 +6230,7 @@ class MeshWriter for (int j = 0; j <= Pn_order; j++) { for (int i = 0; i <= Pn_order; i++) { size_t node_lid = PointIndexFromIJK(i, j, k, order); - fprintf(out[0], "%lu ", nodes_in_elem_host(elem_gid, node_lid)); + fprintf(out[0], "%lu ", nodes_in_elem.host(elem_gid, node_lid)); } } } // end for @@ -6272,13 +6238,13 @@ class MeshWriter else if (num_dims == 3 && Pn_order == 1){ // 3D linear hexahedral elements for (int node_lid = 0; node_lid < 8; node_lid++) { - fprintf(out[0], "%lu ", nodes_in_elem_host(elem_gid, node_lid)); + fprintf(out[0], "%lu ", nodes_in_elem.host(elem_gid, node_lid)); } // end for } else if (num_dims == 2){ // 2D linear is the only supported option for (int node_lid = 0; node_lid < 4; node_lid++) { - fprintf(out[0], "%lu ", nodes_in_elem_host(elem_gid, node_lid)); + fprintf(out[0], "%lu ", nodes_in_elem.host(elem_gid, node_lid)); } // end for } else { @@ -6348,9 +6314,9 @@ class MeshWriter for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { fprintf(out[0], " %f %f %f\n", - node_vector_fields(node_gid, a_var, 0), - node_vector_fields(node_gid, a_var, 1), - node_vector_fields(node_gid, a_var, 2)); + node_vector_fields.host(node_gid, a_var, 0), + node_vector_fields.host(node_gid, a_var, 1), + node_vector_fields.host(node_gid, a_var, 2)); } // end for nodes fprintf(out[0], " \n"); @@ -6361,7 +6327,7 @@ class MeshWriter for (int a_var = 0; a_var < num_node_scalar_vars; a_var++) { fprintf(out[0], " \n", node_scalar_var_names[a_var].c_str()); for (size_t node_gid = 0; node_gid < num_nodes; node_gid++) { - fprintf(out[0], " %f\n", node_scalar_fields(node_gid, a_var)); + fprintf(out[0], " %f\n", node_scalar_fields.host(node_gid, a_var)); } // end for nodes fprintf(out[0], " \n"); } // end for vec_vars @@ -6386,7 +6352,7 @@ class MeshWriter fprintf(out[0], " \n", elem_scalar_var_names[a_var].c_str()); // the 1 is number of scalar components [1:4] for (size_t elem_gid = 0; elem_gid < num_elems; elem_gid++) { - fprintf(out[0], " %f\n", elem_scalar_fields(elem_gid, a_var)); + fprintf(out[0], " %f\n", elem_scalar_fields.host(elem_gid, a_var)); } // end for elem fprintf(out[0], " \n"); } // end for elem scalar_vars @@ -6401,7 +6367,7 @@ class MeshWriter // Txx Txy Txz Tyx Tyy Tyz Tzx Tzy Tzz for (size_t i=0; i<3; i++){ for(size_t j=0; j<3; j++){ - fprintf(out[0], " %f ", elem_tensor_fields(elem_gid, a_var, i, j)); + fprintf(out[0], " %f ", elem_tensor_fields.host(elem_gid, a_var, i, j)); } // end j } // end i } // end for elem @@ -6681,8 +6647,8 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void build_material_node_list( const Mesh_t& mesh, - DCArrayKokkos& collective_mat_node_indices, - DistributedFArray& mat_nodes_in_mat_elem, + DCArrayKokkos& mat_node_indices, + DistributedDCArray& mat_nodes_in_mat_elem, const DRaggedRightArrayKokkos& elem_in_mat_elem, const size_t mat_id, size_t& num_mat_nodes, @@ -6699,7 +6665,7 @@ class MeshWriter // set nodes per element for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - node_gid = mat_nodes_in_mat_elem(elem_mat_id, node_lid); //nodes in elem still stores global indices + node_gid = mat_nodes_in_mat_elem.host(elem_mat_id, node_lid); //nodes in elem still stores global indices mat_node_set.insert(node_gid); } } @@ -6713,11 +6679,12 @@ class MeshWriter auto it = mat_node_set.begin(); // create a Map for ghost node indices - collective_mat_node_indices = DCArrayKokkos(num_mat_nodes, "mat_nodes"); + mat_node_indices = DCArrayKokkos(num_mat_nodes, "mat_nodes"); while (it != mat_node_set.end()) { - collective_mat_node_indices(ighost++) = *it; + mat_node_indices(ighost++) = *it; it++; } + mat_node_indices.update_device(); } // end build part (i.e., material elem and point lists) function diff --git a/single-node-refactor/src/common/include/state.h b/single-node-refactor/src/common/include/state.h index d0c68f078..cbc204cc1 100644 --- a/single-node-refactor/src/common/include/state.h +++ b/single-node-refactor/src/common/include/state.h @@ -73,6 +73,8 @@ using DistributedFArray = TpetraDFArray; template using CommPlan = TpetraLRCommunicationPlan; template +using OutputCommPlan = TpetraCommunicationPlan; +template using HostCommPlanLR = TpetraLRCommunicationPlan; template using HostCommPlan = TpetraCommunicationPlan; From 83d3141ff0ea66f69d71fac1b8644e558ac192cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 21 Aug 2025 16:03:35 -0600 Subject: [PATCH 46/66] WIP: fully parallel vtm --- .../src/common/include/mesh.h | 2 +- .../src/common/include/mesh_io.h | 91 +++++++++++-------- 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 682865229..6fa68af8d 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -649,7 +649,7 @@ struct Mesh_t // set nodes per element for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - node_gid = nodes_in_elem.host(cell_rid, node_lid); + node_gid = all_node_map.getGlobalIndex(nodes_in_elem.host(cell_rid, node_lid)); nonoverlap_elem_node_set.insert(node_gid); } } diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 5270d28a8..44e83c9ec 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3525,7 +3525,7 @@ class MeshWriter //host version of local element map for argument compatibility DistributedDFArray elem_scalar_fields(local_element_map, num_elem_scalar_vars, "elem_scalars"); DistributedDFArray elem_tensor_fields(local_element_map, num_elem_tensor_vars, 3, 3, "elem_tensors"); - DistributedDFArray nonoverlap_nodes_in_elem(local_element_map, num_nodes_in_elem, "nonoverlap_nodes_in_elem"); + DistributedDCArray nonoverlap_nodes_in_elem(local_element_map, num_nodes_in_elem, "nonoverlap_nodes_in_elem"); elem_scalar_fields.set_values(0.0); elem_tensor_fields.set_values(0.0); @@ -3624,6 +3624,8 @@ class MeshWriter } } // end for elem_gid + //nonoverlap_nodes_in_elem.print(); + if(myrank==0){ // create the folder structure if it does not exist struct stat st; @@ -3664,26 +3666,24 @@ class MeshWriter // call the .vtu writer for element fields std::string elem_fields_name = "fields"; - if(myrank==0){ - write_vtu(nonoverlap_node_coords, - nonoverlap_nodes_in_elem, - elem_scalar_fields, - elem_tensor_fields, - nonoverlap_node_scalar_fields, - nonoverlap_node_vector_fields, - elem_scalar_var_names, - elem_tensor_var_names, - node_scalar_var_names, - node_vector_var_names, - elem_fields_name, - graphics_id, - nonoverlap_elem_node_map.size(), - mesh.num_local_elems, - num_nodes_in_elem, - Pn_order, - num_dims, - solver_id); - } + write_vtu(nonoverlap_node_coords, + nonoverlap_nodes_in_elem, + elem_scalar_fields, + elem_tensor_fields, + nonoverlap_node_scalar_fields, + nonoverlap_node_vector_fields, + elem_scalar_var_names, + elem_tensor_var_names, + node_scalar_var_names, + node_vector_var_names, + elem_fields_name, + graphics_id, + nonoverlap_elem_node_map.size(), + mesh.num_local_elems, + num_nodes_in_elem, + Pn_order, + num_dims, + solver_id); // ******************************** // Build and write the mat fields @@ -3700,11 +3700,16 @@ class MeshWriter //set global element indices on this rank for this mat DistributedMap element_map = mesh.element_map; - DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + // FOR_ALL(ielem, 0, num_mat_local_elems,{ + // global_indices_of_local_mat_elems(ielem) = mesh.element_map(State.MaterialToMeshMaps.elem_in_mat_elem(mat_id, ielem)); + // }); for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); + global_indices_of_local_mat_elems(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem(mat_id, ielem)); } + global_indices_of_local_mat_elems.update_device(); DistributedMap mat_elem_map = DistributedMap(global_indices_of_local_mat_elems); + //mat_elem_map.print(); //allocate arrays for distributed mat elem data DistributedDFArray mat_elem_scalar_fields(mat_elem_map, num_mat_pt_scalar_vars, "mat_elem_scalars"); @@ -3749,10 +3754,17 @@ class MeshWriter CommPlan mat_nodes_in_elem_comms(mat_nodes_in_mat_elem,mesh.nodes_in_elem); //shouldnt do comms since subview of map on this rank mat_nodes_in_elem_comms.execute_comms(); + //convert mesh.nodes_in_elem stores local indices and we communicated these in, convert to global + for (size_t elem_id = 0; elem_id < num_mat_local_elems; elem_id++) { + for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { + mat_nodes_in_mat_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mat_nodes_in_mat_elem(elem_id, node_lid)); + } + } // end for elem_gid + //define set of nodes for this mat, collect on rank 0, comms on coords, scalars, and vectors for nodes for this mat // build a unique mesh (element and nodes) for the material (i.e., the part) - DCArrayKokkos mat_node_indices; + DCArrayKokkos mat_node_indices; build_material_node_list(mesh, mat_node_indices, mat_nodes_in_mat_elem, @@ -3764,7 +3776,7 @@ class MeshWriter num_dims); //map object for mat node indices - DistributedMap collective_mat_node_map = DistributedMap(mat_node_indices); + DistributedMap mat_node_map = DistributedMap(mat_node_indices); DistributedDCArray mat_node_coords(mat_node_map, num_dims, "mat_node_coords"); CommPlan mat_node_coords_comms(mat_node_coords,State.node.coords); //shouldnt do comms since subview of map on this rank @@ -3778,15 +3790,15 @@ class MeshWriter OutputCommPlan mat_node_vectors_comms(mat_node_vector_fields,node_vector_fields); //shouldnt do comms since subview of map on this rank mat_node_vectors_comms.execute_comms(); - //convert collective mat_nodes_in_mat_elem so it uses contiguous node ids for this mat portion of the mesh + //convert mat_nodes_in_mat_elem so it uses contiguous local node ids for this mat portion of the mesh for (size_t elem_id = 0; elem_id < num_mat_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - mat_nodes_in_mat_elem(elem_id, node_lid) = mat_node_map.getLocalIndex(mesh.all_node_map.getGlobalIndex(mat_nodes_in_mat_elem(elem_id, node_lid))); + mat_nodes_in_mat_elem(elem_id, node_lid) = mat_node_map.getLocalIndex(mat_nodes_in_mat_elem(elem_id, node_lid)); } } // end for elem_gid // only write material data if the mat lives on the mesh, ie. has state allocated - if (num_mat_collective_elems>0&&myrank==0){ + if (num_mat_local_elems>0){ // write out a vtu file this write_vtu(mat_node_coords, mat_nodes_in_mat_elem, @@ -3843,6 +3855,8 @@ class MeshWriter // call the vtm file writer std::string mat_fields_name = "mat"; + //gather MPI ranks that are writing blocks + if(myrank==0){ write_vtm(graphics_times, elem_fields_name, @@ -5826,10 +5840,10 @@ class MeshWriter FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // get elem gid - size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); + size_t elem_gid = elem_in_mat_elem(mat_id, mat_elem_lid); // field - mat_elem_scalar_fields(mat_elem_gid, mat_conductivity_id) += MaterialPoints.conductivity(mat_id, mat_elem_lid); + mat_elem_scalar_fields(elem_gid, mat_conductivity_id) += MaterialPoints.conductivity(mat_id, mat_elem_lid); }); break; @@ -5837,10 +5851,10 @@ class MeshWriter FOR_ALL(mat_elem_lid, 0, num_mat_elems, { // get elem gid - size_t elem_gid = MaterialToMeshMaps_elem(mat_id, mat_elem_lid); + size_t elem_gid = elem_in_mat_elem(mat_id, mat_elem_lid); // field - mat_elem_scalar_fields(mat_elem_gid, mat_specific_heat_id) += MaterialPoints.specific_heat(mat_id, mat_elem_lid); + mat_elem_scalar_fields(elem_gid, mat_specific_heat_id) += MaterialPoints.specific_heat(mat_id, mat_elem_lid); }); break; @@ -6156,6 +6170,11 @@ class MeshWriter char filename[100]; // char string int max_len = sizeof filename; int str_output_len; + int myrank; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + std::string str_rank_val = std::to_string(myrank); + std::string rank_fields_name = "rank"; + rank_fields_name += str_rank_val; // add the mat number const size_t num_elem_scalar_vars = elem_scalar_var_names.size(); const size_t num_elem_tensor_vars = elem_tensor_var_names.size(); @@ -6165,8 +6184,8 @@ class MeshWriter // create filename - str_output_len = snprintf(filename, max_len, "vtk/data/Fierro.solver%zu.%s.%05d.vtu", - solver_id, partname.c_str(), graphics_id); + str_output_len = snprintf(filename, max_len, "vtk/data/Fierro.solver%zu.%s_%s.%05d.vtu", + solver_id, partname.c_str(), rank_fields_name.c_str(), graphics_id); if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } // mesh file @@ -6647,7 +6666,7 @@ class MeshWriter ///////////////////////////////////////////////////////////////////////////// void build_material_node_list( const Mesh_t& mesh, - DCArrayKokkos& mat_node_indices, + DCArrayKokkos& mat_node_indices, DistributedDCArray& mat_nodes_in_mat_elem, const DRaggedRightArrayKokkos& elem_in_mat_elem, const size_t mat_id, @@ -6679,7 +6698,7 @@ class MeshWriter auto it = mat_node_set.begin(); // create a Map for ghost node indices - mat_node_indices = DCArrayKokkos(num_mat_nodes, "mat_nodes"); + mat_node_indices = DCArrayKokkos(num_mat_nodes, "mat_nodes"); while (it != mat_node_set.end()) { mat_node_indices(ighost++) = *it; it++; From 7bd9507ce4cd9e9a6545e8712eb70b233ec8541e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 21 Aug 2025 21:30:52 -0600 Subject: [PATCH 47/66] WIP: parallel vtm writer --- .../src/common/include/mesh_io.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 44e83c9ec..d71a66774 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3663,6 +3663,8 @@ class MeshWriter } } } + + MPI_Barrier(MPI_COMM_WORLD); // call the .vtu writer for element fields std::string elem_fields_name = "fields"; @@ -3855,8 +3857,7 @@ class MeshWriter // call the vtm file writer std::string mat_fields_name = "mat"; - //gather MPI ranks that are writing blocks - + //gather MPI ranks that are writing mat blocks if(myrank==0){ write_vtm(graphics_times, elem_fields_name, @@ -3864,6 +3865,7 @@ class MeshWriter time_value, graphics_id, num_mat_files_written, + nranks, write_mesh_state, write_mat_pt_state, solver_id); @@ -6477,6 +6479,7 @@ class MeshWriter double time_value, int graphics_id, int num_mats, + int nranks, bool write_mesh_state, bool write_mat_pt_state, const size_t solver_id) @@ -6512,8 +6515,10 @@ class MeshWriter // elem and nodal fields are in this file fprintf(out[0], " \n"); - fprintf(out[0], " \n", - file_id, solver_id, elem_part_name.c_str(), file_id, graphics_times(file_id) ); + for(int irank = 0; irank < nranks; irank++){ + fprintf(out[0], " \n", + irank, solver_id, elem_part_name.c_str(), irank, file_id ); + } fprintf(out[0], " \n"); // add other Mesh average output Pieces here @@ -6528,8 +6533,10 @@ class MeshWriter // output the material specific fields fprintf(out[0], " \n", mat_id, mat_id); - fprintf(out[0], " \n", - file_id, solver_id, mat_part_name.c_str(), mat_id, file_id, graphics_times(file_id) ); + for(int irank = 0; irank < nranks; irank++){ + fprintf(out[0], " \n", + irank, solver_id, mat_part_name.c_str(), mat_id, irank, file_id ); + } fprintf(out[0], " \n"); } // end for loop mat_id From c5202aa1a6cc144f98500c6f00413a296ab9f7ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 24 Aug 2025 22:59:38 -0600 Subject: [PATCH 48/66] WIP: parallel vtm writer --- .../src/common/include/mesh_io.h | 198 ++++++++++++------ 1 file changed, 134 insertions(+), 64 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index d71a66774..3e1d85ad1 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -3694,11 +3694,16 @@ class MeshWriter // note: the file path and folder was created in the elem and node outputs size_t num_mat_files_written = 0; + CArray local_mats_in_rank(num_mats); + int local_num_mats = 0; if(num_mat_pt_scalar_vars > 0 || num_mat_pt_tensor_vars >0){ - for (int mat_id = 0; mat_id < num_mats; mat_id++) { const size_t num_mat_local_elems = State.MaterialToMeshMaps.num_mat_local_elems.host(mat_id); + if(num_mat_local_elems){ + local_mats_in_rank(local_num_mats) = mat_id; + local_num_mats++; + } //set global element indices on this rank for this mat DistributedMap element_map = mesh.element_map; @@ -3858,18 +3863,21 @@ class MeshWriter // call the vtm file writer std::string mat_fields_name = "mat"; //gather MPI ranks that are writing mat blocks + write_vtm(graphics_times, + elem_fields_name, + mat_fields_name, + time_value, + graphics_id, + num_mats, + local_num_mats, + local_mats_in_rank, + nranks, + myrank, + write_mesh_state, + write_mat_pt_state, + solver_id); + if(myrank==0){ - write_vtm(graphics_times, - elem_fields_name, - mat_fields_name, - time_value, - graphics_id, - num_mat_files_written, - nranks, - write_mesh_state, - write_mat_pt_state, - solver_id); - // call the pvd file writer write_pvd(graphics_times, time_value, @@ -6479,77 +6487,139 @@ class MeshWriter double time_value, int graphics_id, int num_mats, + int local_num_mats, + CArray local_mats_in_rank, int nranks, + int myrank, bool write_mesh_state, bool write_mat_pt_state, const size_t solver_id) - { - // loop over all the files that were written - for(int file_id=0; file_id<=graphics_id; file_id++){ + { + //gather number of mats on each rank + //array storing number of local elems for this material on each process + CArray num_mats_in_rank, gatherv_displacements; + CArray interface_mats_in_rank; + if(myrank==0){ + num_mats_in_rank = CArray(nranks); + interface_mats_in_rank = CArray(nranks); + gatherv_displacements = CArray(nranks); + } + MPI_Gather(&local_num_mats,1,MPI_INT,num_mats_in_rank.pointer(),1, + MPI_INT, 0, MPI_COMM_WORLD); - FILE* out[20]; // the output files that are written to - char filename[100]; // char string - int max_len = sizeof filename; - int str_output_len; + //gather which mats are present on each rank + long long int length_mats_in_rank = 0; + RaggedRightArray mats_in_rank; + if(myrank==0){ + for(int irank=0; irank < nranks; irank++){ + gatherv_displacements(irank) = length_mats_in_rank; + interface_mats_in_rank(irank) = num_mats_in_rank(irank); + length_mats_in_rank += num_mats_in_rank(irank); + } + mats_in_rank = RaggedRightArray(interface_mats_in_rank); + } + // if(myrank==0){ + // for(int irank=0; irank < nranks; irank++){ + // std::cout << "NUM local mat elem on rank " << irank << " is " << num_mats_in_rank(irank) << std::endl; + // std::cout << "gatherv displacement on rank " << irank << " is " << gatherv_displacements(irank) << std::endl; + // } + // } + MPI_Gatherv(local_mats_in_rank.pointer(), local_num_mats, MPI_INT, + mats_in_rank.pointer(), num_mats_in_rank.pointer(), + gatherv_displacements.pointer(), MPI_INT, 0, MPI_COMM_WORLD); - // Write time series metadata to the data file - str_output_len = snprintf(filename, max_len, "vtk/data/Fierro.solver%zu.%05d.vtm", solver_id, file_id); + if(myrank==0){ - if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } - // mesh file + //invert map of rank to mat so its mat to rank + CArray num_ranks_in_mat(num_mats); + num_ranks_in_mat.set_values(0); - out[0] = fopen(filename, "w"); - - fprintf(out[0], "\n"); - fprintf(out[0], "\n"); - fprintf(out[0], " \n"); + //count how many ranks each material is in + for(int irank = 0; irank < nranks; irank++){ + for(int imat = 0; imat < num_mats_in_rank(irank); imat++){ + num_ranks_in_mat(mats_in_rank(irank,imat))++; + } + } + //allocate ragged storage and assign ranks to each mat + RaggedRightArray ranks_in_mat(num_ranks_in_mat); + num_ranks_in_mat.set_values(0); + + for(int irank = 0; irank < nranks; irank++){ + for(int imat = 0; imat < num_mats_in_rank(irank); imat++){ + ranks_in_mat(mats_in_rank(irank,imat),num_ranks_in_mat(mats_in_rank(irank,imat))) = irank; + num_ranks_in_mat(mats_in_rank(irank,imat))++; + } + } - // Average mesh fields -- node and elem state written - size_t block_id = 0; // this will need to be incremented based on the number of mesh fields written - if (write_mesh_state){ - fprintf(out[0], " \n", block_id); - { - block_id++; // increment block id for material outputs that follow the element avg block + // loop over all the files that need to be written + for(int file_id=0; file_id<=graphics_id; file_id++){ - // elem and nodal fields are in this file - fprintf(out[0], " \n"); - for(int irank = 0; irank < nranks; irank++){ - fprintf(out[0], " \n", - irank, solver_id, elem_part_name.c_str(), irank, file_id ); - } - fprintf(out[0], " \n"); + FILE* out[20]; // the output files that are written to + char filename[100]; // char string + int max_len = sizeof filename; + int str_output_len; - // add other Mesh average output Pieces here - } - fprintf(out[0], " \n"); - } // end if write elem and node state is true - // note: the block_id was incremented if an element average field output was made - if (write_mat_pt_state){ - fprintf(out[0], " \n", block_id); - for (size_t mat_id=0; mat_id\n", mat_id, mat_id); - for(int irank = 0; irank < nranks; irank++){ - fprintf(out[0], " \n", - irank, solver_id, mat_part_name.c_str(), mat_id, irank, file_id ); + // Write time series metadata to the data file + str_output_len = snprintf(filename, max_len, "vtk/data/Fierro.solver%zu.%05d.vtm", solver_id, file_id); + + if (str_output_len >= max_len) { fputs("Filename length exceeded; string truncated", stderr); } + // mesh file + + out[0] = fopen(filename, "w"); + + fprintf(out[0], "\n"); + fprintf(out[0], "\n"); + fprintf(out[0], " \n"); + + + // Average mesh fields -- node and elem state written + size_t block_id = 0; // this will need to be incremented based on the number of mesh fields written + if (write_mesh_state){ + fprintf(out[0], " \n", block_id); + { + block_id++; // increment block id for material outputs that follow the element avg block + + // elem and nodal fields are in this file + fprintf(out[0], " \n"); + for(int irank = 0; irank < nranks; irank++){ + fprintf(out[0], " \n", + irank, solver_id, elem_part_name.c_str(), irank, file_id ); + } + fprintf(out[0], " \n"); + + // add other Mesh average output Pieces here } - fprintf(out[0], " \n"); + fprintf(out[0], " \n"); + } // end if write elem and node state is true - } // end for loop mat_id - fprintf(out[0], " \n"); - } // end if write mat satte is true + // note: the block_id was incremented if an element average field output was made + if (write_mat_pt_state){ + fprintf(out[0], " \n", block_id); + for (size_t mat_id=0; mat_id\n", mat_id, mat_id); + for(int irank = 0; irank < num_ranks_in_mat(mat_id); irank++){ + fprintf(out[0], " \n", + irank, solver_id, mat_part_name.c_str(), mat_id, ranks_in_mat(mat_id, irank), file_id ); + } + fprintf(out[0], " \n"); - // done writing the files to be read by the vtm file - fprintf(out[0], " \n"); - fprintf(out[0], ""); + } // end for loop mat_id + fprintf(out[0], " \n"); + } // end if write mat satte is true - fclose(out[0]); + // done writing the files to be read by the vtm file + fprintf(out[0], " \n"); + fprintf(out[0], ""); - } // end for file_id + fclose(out[0]); + + } // end for file_id + } } // end vtm From 31612e5e6b16dd7cf7107019036daf8ce2481e2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 4 Sep 2025 20:05:25 -0600 Subject: [PATCH 49/66] WIP: vtu reader --- .../src/common/include/mesh_io.h | 1075 +++++------------ 1 file changed, 286 insertions(+), 789 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 3e1d85ad1..dd1cff894 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -213,8 +213,8 @@ inline bool extract_values_xml(T *values_xml, // find the number of points and number of cells in the mesh -inline bool extract_num_points_and_cells_xml(int& numberOfPoints, - int& numberOfCells, +inline bool extract_num_points_and_cells_xml(size_t& numberOfPoints, + size_t& numberOfCells, std::ifstream& in) { bool found = false; @@ -383,314 +383,6 @@ class MeshReader } - ///////////////////////////////////////////////////////////////////////////// - /// - /// \fn read_ensight_mesh - /// - /// \brief Read .geo mesh file - /// - /// \param Simulation mesh - /// \param Element state struct - /// \param Node state struct - /// \param Corner state struct - /// \param Number of dimensions - /// - ///////////////////////////////////////////////////////////////////////////// - // void read_ensight_mesh(Mesh_t& mesh, - // GaussPoint_t& GaussPoints, - // node_t& node, - // corner_t& corner, - // mesh_input_t& mesh_inps, - // int num_dims) - // { - // FILE* in; - // char ch; - - // size_t num_nodes_in_elem = 1; - // for (int dim = 0; dim < num_dims; dim++) { - // num_nodes_in_elem *= 2; - // } - - // // read the mesh WARNING: assumes a .geo file - // in = fopen(mesh_file_, "r"); - - // // skip 8 lines - // for (int j = 1; j <= 8; j++) { - // int i = 0; - // while ((ch = (char)fgetc(in)) != '\n') { - // i++; - // } - // } - - // // --- Read in the nodes in the mesh --- - - // size_t num_nodes = 0; - - // fscanf(in, "%lu", &num_nodes); - // printf("Number of nodes read in %lu\n", num_nodes); - - - // mesh.initialize_nodes(num_nodes); - - // // initialize node state variables, for now, we just need coordinates, the rest will be initialize by the respective solvers - // std::vector required_node_state = { node_state::coords }; - // node.initialize(num_nodes, num_dims, required_node_state); - - // // read the initial mesh coordinates - // // x-coords - // for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { - // fscanf(in, "%le", &node.coords.host(node_id, 0)); - // node.coords.host(node_id, 0)*= mesh_inps.scale_x; - // } - - // // y-coords - // for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { - // fscanf(in, "%le", &node.coords.host(node_id, 1)); - // node.coords.host(node_id, 1)*= mesh_inps.scale_y; - // } - - // // z-coords - // for (int node_id = 0; node_id < mesh.num_nodes; node_id++) { - // if (num_dims == 3) { - // fscanf(in, "%le", &node.coords.host(node_id, 2)); - // node.coords.host(node_id, 2)*= mesh_inps.scale_z; - // } - // else{ - // double dummy; - // fscanf(in, "%le", &dummy); - // } - // } // end for - - - // // Update device nodal positions - // node.coords.update_device(); - - // ch = (char)fgetc(in); - - // // skip 1 line - // for (int j = 1; j <= 1; j++) { - // int i = 0; - // while ((ch = (char)fgetc(in)) != '\n') { - // i++; - // } - // } - - // // --- read in the elements in the mesh --- - // size_t num_elems = 0; - - // fscanf(in, "%lu", &num_elems); - // printf("Number of elements read in %lu\n", num_elems); - - // // initialize elem variables - // mesh.initialize_elems(num_elems, num_dims); - // // GaussPoints.initialize(num_elems, 3); // always 3D here, even for 2D - - - // // for each cell read the list of associated nodes - // for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - // for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - // fscanf(in, "%lu", &mesh.nodes_in_elem.host(elem_gid, node_lid)); // %d vs zu - - // // shift to start node index space at 0 - // mesh.nodes_in_elem.host(elem_gid, node_lid) -= 1; - // } - // } - - // // Convert from ensight to IJK mesh - // int convert_ensight_to_ijk[8]; - // convert_ensight_to_ijk[0] = 0; - // convert_ensight_to_ijk[1] = 1; - // convert_ensight_to_ijk[2] = 3; - // convert_ensight_to_ijk[3] = 2; - // convert_ensight_to_ijk[4] = 4; - // convert_ensight_to_ijk[5] = 5; - // convert_ensight_to_ijk[6] = 7; - // convert_ensight_to_ijk[7] = 6; - - // int tmp_ijk_indx[8]; - - // for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - // for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - // tmp_ijk_indx[node_lid] = mesh.nodes_in_elem.host(elem_gid, convert_ensight_to_ijk[node_lid]); - // } - - // for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++){ - // mesh.nodes_in_elem.host(elem_gid, node_lid) = tmp_ijk_indx[node_lid]; - // } - // } - // // update device side - // mesh.nodes_in_elem.update_device(); - - // // initialize corner variables - // int num_corners = num_elems * mesh.num_nodes_in_elem; - // mesh.initialize_corners(num_corners); - // // corner.initialize(num_corners, num_dims); - - // // Close mesh input file - // fclose(in); - - // return; - // } // end read ensight mesh - - ///////////////////////////////////////////////////////////////////////////// - /// - /// \fn read_Abaqus_mesh - /// - /// \brief Read .inp mesh file - /// - /// \param Simulation mesh - /// \param Simulation state - /// \param Node state struct - /// \param Number of dimensions - /// - ///////////////////////////////////////////////////////////////////////////// - // void read_Abaqus_mesh(Mesh_t& mesh, - // State_t& State, - // int num_dims) - // { - - // std::cout<<"Reading abaqus input file for mesh"< nodes; - // std::vector elements; - - // std::string line; - // bool readingNodes = false; - // bool readingElements = false; - - // while (std::getline(inputFile, line)) { - // if (line.find("*Node") != std::string::npos) { - // readingNodes = true; - // std::cout<<"Found *Node"<> node.id && std::getline(iss, token, ',') && iss >> node.x && - // std::getline(iss, token, ',') && iss >> node.y && - // std::getline(iss, token, ',') && iss >> node.z)) { - // std::cerr << "Failed to parse line: " << line << std::endl; - // continue; // Skip this line if parsing failed - // } - // nodes.push_back(node); - // } - - // if (line.find("*Element") != std::string::npos) { - // readingElements = true; - // std::cout<<"Found *Element*"<> element.id)){ - // std::cout << "Failed to parse line: " << line << std::endl; - // continue; // Skip this line if parsing failed - // } - - // while ((std::getline(iss, token, ','))) { - // // Now extract the integer, ignoring any trailing whitespace - // int val; - // iss >> val; - // element.connectivity.push_back(val); - // } - - // // Convert from abaqus to IJK mesh - // int convert_abq_to_ijk[8]; - // convert_abq_to_ijk[0] = 0; - // convert_abq_to_ijk[1] = 1; - // convert_abq_to_ijk[2] = 3; - // convert_abq_to_ijk[3] = 2; - // convert_abq_to_ijk[4] = 4; - // convert_abq_to_ijk[5] = 5; - // convert_abq_to_ijk[6] = 7; - // convert_abq_to_ijk[7] = 6; - - // int tmp_ijk_indx[8]; - - // for (int node_lid = 0; node_lid < 8; node_lid++) { - // tmp_ijk_indx[node_lid] = element.connectivity[convert_abq_to_ijk[node_lid]]; - // } - - // for (int node_lid = 0; node_lid < 8; node_lid++){ - // element.connectivity[node_lid] = tmp_ijk_indx[node_lid]; - // } - - // elements.push_back(element); - // } - // } - - // inputFile.close(); - - // size_t num_nodes = nodes.size(); - - // printf("Number of nodes read in %lu\n", num_nodes); - - // // initialize node variables - // mesh.initialize_nodes(num_nodes); - - // // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers - // std::vector required_node_state = { node_state::coords }; - - // State.node.initialize(num_nodes, num_dims, required_node_state); - - - // // Copy nodes to mesh - // for(int node_gid = 0; node_gid < num_nodes; node_gid++){ - // State.node.coords.host(node_gid, 0) = nodes[node_gid].x; - // State.node.coords.host(node_gid, 1) = nodes[node_gid].y; - // State.node.coords.host(node_gid, 2) = nodes[node_gid].z; - // } - - // // Update device nodal positions - // State.node.coords.update_device(); - - - // // --- read in the elements in the mesh --- - // size_t num_elems = elements.size(); - // printf("Number of elements read in %lu\n", num_elems); - - // // initialize elem variables - // mesh.initialize_elems(num_elems, num_dims); - - - // // for each cell read the list of associated nodes - // for (int elem_gid = 0; elem_gid < num_elems; elem_gid++) { - // for (int node_lid = 0; node_lid < 8; node_lid++) { - // mesh.nodes_in_elem.host(elem_gid, node_lid) = elements[elem_gid].connectivity[node_lid]; - - // // shift to start node index space at 0 - // mesh.nodes_in_elem.host(elem_gid, node_lid) -= 1; - // } - // } - - // // update device side - // mesh.nodes_in_elem.update_device(); - - // // initialize corner variables - // int num_corners = num_elems * mesh.num_nodes_in_elem; - // mesh.initialize_corners(num_corners); - // // State.corner.initialize(num_corners, num_dims); - - // } // end read abaqus mesh - ///////////////////////////////////////////////////////////////////////////// /// @@ -704,227 +396,6 @@ class MeshReader /// \param Number of dimensions /// ///////////////////////////////////////////////////////////////////////////// - // void read_vtk_mesh(Mesh_t& mesh, - // GaussPoint_t& GaussPoints, - // node_t& node, - // corner_t& corner, - // mesh_input_t& mesh_inps, - // int num_dims) - // { - - // std::cout<<"Reading VTK mesh"< v = split (str, delimiter); - - // // looking for the following text: - // // POINTS %d float - // if(v[0] == "POINTS"){ - // size_t num_nodes = std::stoi(v[1]); - // printf("Number of nodes read in %zu\n", num_nodes); - // mesh.initialize_nodes(num_nodes); - - // std::vector required_node_state = { node_state::coords }; - // node.initialize(num_nodes, num_dims, required_node_state); - - // found=true; - // } // end if - - - // if (i>1000){ - // std::cerr << "ERROR: Failed to find POINTS in file" << std::endl; - // break; - // } // end if - - // i++; - // } // end while - // } - - - // if(myrank==0){ - // // read the node coordinates - // for (node_gid=0; node_gid v = split (str, delimiter); - - // // save the nodal coordinates - // node.coords.host(node_gid, 0) = mesh_inps.scale_x*std::stod(v[0]); // double - // node.coords.host(node_gid, 1) = mesh_inps.scale_y*std::stod(v[1]); // double - // if(num_dims==3){ - // node.coords.host(node_gid, 2) = mesh_inps.scale_z*std::stod(v[2]); // double - // } - - // } // end for nodes - // } - - // // Update device nodal positions - // node.coords.update_device(); - - // if(myrank==0){ - // found=false; - - // // look for CELLS - // i = 0; - // size_t num_elems = 0; - // while (found==false) { - // std::string str; - // std::getline(in, str); - - // std::string delimiter = " "; - // std::vector v = split (str, delimiter); - // std::cout << v[0] << std::endl; // printing - - // // looking for the following text: - // // CELLS num_elems size - // if(v[0] == "CELLS"){ - // num_elems = std::stoi(v[1]); - // printf("Number of elements read in %zu\n", num_elems); - - // // initialize elem variables - // mesh.initialize_elems(num_elems, num_dims); - - // found=true; - // } // end if - - - // if (i>1000){ - // printf("ERROR: Failed to find CELLS \n"); - // break; - // } // end if - - // i++; - // } // end while - // } - - // if(myrank==0){ - // // read the node ids in the element - // for (elem_gid=0; elem_gid v = split (str, delimiter); - // num_nodes_in_elem = std::stoi(v[0]); - - // for (size_t node_lid=0; node_lid v = split (str, delimiter); - - // // looking for the following text: - // // CELLS num_elems size - // if(v[0] == "CELL_TYPES"){ - - // std::getline(in, str); - // elem_type = std::stoi(str); - - // found=true; - // } // end if - - - // if (i>1000){ - // printf("ERROR: Failed to find elem_TYPE \n"); - // break; - // } // end if - - // i++; - // } // end while - // printf("Element type = %zu \n", elem_type); - // // elem types: - // // linear hex = 12, linear quad = 9 - // found=false; - - - // if(num_nodes_in_elem==8 & elem_type != 12) { - // printf("Wrong element type of %zu \n", elem_type); - // std::cerr << "ERROR: incorrect element type in VTK file" << std::endl; - // } - - // in.close(); - // } - - // } // end of VTKread function void read_vtk_mesh(Mesh_t& mesh, GaussPoint_t& GaussPoints, @@ -1454,302 +925,328 @@ class MeshReader /// \param Number of dimensions /// ///////////////////////////////////////////////////////////////////////////// - // void read_vtu_mesh(Mesh_t& mesh, - // GaussPoint_t& GaussPoints, - // node_t& node, - // corner_t& corner, - // mesh_input_t& mesh_inps, - // int num_dims) - // { + void read_vtu_mesh(Mesh_t& mesh, + GaussPoint_t& GaussPoints, + node_t& node, + corner_t& corner, + mesh_input_t& mesh_inps, + int num_dims) + { + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); - // std::cout<<"Reading VTU file in a multiblock VTK mesh"< read_buffer; + + + // read the mesh + // --- Read the number of nodes in the mesh --- // + size_t global_num_nodes = 0; + size_t global_num_elems = 0; + int i; // used for writing information to file + int node_gid; // the global id for the point + int elem_gid; // the global id for the elem + + + // + int Pn_order = mesh_inps.p_order; + size_t num_nodes_in_elem = 1; + for (int dim = 0; dim < num_dims; dim++) { + num_nodes_in_elem *= (Pn_order + 1); + } + + if(myrank==0){ + std::cout<<"Reading VTU file in a multiblock VTK mesh"< required_node_state = { node_state::coords }; + node.initialize(num_nodes, num_dims, required_node_state); + + //------------------------------------ + // allocate the elem object id array + mesh_inps.object_ids = DCArrayKokkos (num_elems, "ObjectIDs"); + + + // ------------------------ + // Mesh file storage order: + // objectId + // Points + // connectivity + // offsets + // types + // ------------------------ + + // temporary arrays + DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files + DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); + DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types + + + // for all fields, we stop recording when we get to "<" + std::string stop = "<"; + + // the size of 1D storage from reading the mesh file + size_t size; + + // --- + // Object ids + // --- + + // the object id in the element + // array dims are (num_elems) + found = extract_values_xml(mesh_inps.object_ids.host.pointer(), + "\"ObjectId\"", + stop, + in, + size); + if(found==false){ + throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); + //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; + } + mesh_inps.object_ids.update_device(); + + + // --- + // Nodal coordinates of mesh + // --- + + // coordinates of the node + // array dims are (num_nodes,dims) + // must use the quotes around Points to read the point values + found = extract_values_xml(node_coords.host.pointer(), + "\"Points\"", + stop, + in, + size); + if(found==false){ + throw std::runtime_error("**** ERROR: mesh nodes were not found in the XML file! ****"); + //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + } + if (size!=num_nodes*3){ + throw std::runtime_error("ERROR: failed to read all the mesh nodes!"); + //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; + } + node_coords.update_device(); - // //------------------------------------ - // // allocate node coordinate state - // std::vector required_node_state = { node_state::coords }; - // node.initialize(num_nodes, num_dims, required_node_state); + // dimensional scaling of the mesh + const double scl_x = mesh_inps.scale_x; + const double scl_y = mesh_inps.scale_y; + const double scl_z = mesh_inps.scale_z; - // //------------------------------------ - // // allocate the elem object id array - // mesh_inps.object_ids = DCArrayKokkos (num_elems, "ObjectIDs"); + // save the node coordinates to the state array + FOR_ALL(node_gid, 0, mesh.num_nodes, { + + // save the nodal coordinates + node.coords(node_gid, 0) = scl_x*node_coords(node_gid, 0); // double + node.coords(node_gid, 1) = scl_y*node_coords(node_gid, 1); // double + if(num_dims==3){ + node.coords(node_gid, 2) = scl_z*node_coords(node_gid, 2); // double + } + }); // end for parallel nodes + node.coords.update_host(); - // // ------------------------ - // // Mesh file storage order: - // // objectId - // // Points - // // connectivity - // // offsets - // // types - // // ------------------------ - - // // temporary arrays - // DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files - // DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); - // DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types - - - // // for all fields, we stop recording when we get to "<" - // std::string stop = "<"; - - // // the size of 1D storage from reading the mesh file - // size_t size; - - // // --- - // // Object ids - // // --- - - // // the object id in the element - // // array dims are (num_elems) - // found = extract_values_xml(mesh_inps.object_ids.host.pointer(), - // "\"ObjectId\"", - // stop, - // in, - // size); - // if(found==false){ - // throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); - // //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; - // } - // mesh_inps.object_ids.update_device(); - - - // // --- - // // Nodal coordinates of mesh - // // --- - - // // coordinates of the node - // // array dims are (num_nodes,dims) - // // must use the quotes around Points to read the point values - // found = extract_values_xml(node_coords.host.pointer(), - // "\"Points\"", - // stop, - // in, - // size); - // if(found==false){ - // throw std::runtime_error("**** ERROR: mesh nodes were not found in the XML file! ****"); - // //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; - // } - // if (size!=num_nodes*3){ - // throw std::runtime_error("ERROR: failed to read all the mesh nodes!"); - // //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; - // } - // node_coords.update_device(); - // // dimensional scaling of the mesh - // const double scl_x = mesh_inps.scale_x; - // const double scl_y = mesh_inps.scale_y; - // const double scl_z = mesh_inps.scale_z; + // --- + // Nodes in the element + // --- - // // save the node coordinates to the state array - // FOR_ALL(node_gid, 0, mesh.num_nodes, { - - // // save the nodal coordinates - // node.coords(node_gid, 0) = scl_x*node_coords(node_gid, 0); // double - // node.coords(node_gid, 1) = scl_y*node_coords(node_gid, 1); // double - // if(num_dims==3){ - // node.coords(node_gid, 2) = scl_z*node_coords(node_gid, 2); // double - // } - - // }); // end for parallel nodes - // node.coords.update_host(); - - - // // --- - // // Nodes in the element - // // --- - - // // fill temporary nodes in the element array - // // array dims are (num_elems,num_nodes_in_elem) - // found = extract_values_xml(connectivity.host.pointer(), - // "\"connectivity\"", - // stop, - // in, - // size); - // if(found==false){ - // std::cout << "ERROR: mesh connectivity was not found in the XML file!" << std::endl; - // } - // connectivity.update_device(); - - // // array dims are the (num_elems) - // // 8 = pixal i,j,k linear quad format - // // 9 = linear quad ensight ordering - // // 12 = linear ensight hex ordering - // // 72 = VTK_LAGRANGE_HEXAHEDRON - // // .... - // found = extract_values_xml(elem_types.host.pointer(), - // "\"types\"", - // stop, - // in, - // size); - // if(found==false){ - // std::cout << "ERROR: element types were not found in the XML file!" << std::endl; - // } - // elem_types.update_device(); - - // // check that the element type is supported by Fierro - // FOR_ALL (elem_gid, 0, mesh.num_elems, { - // if(elem_types(elem_gid) == element_types::linear_quad || - // elem_types(elem_gid) == element_types::linear_hex_ijk || - // elem_types(elem_gid) == element_types::linear_hex || - // elem_types(elem_gid) == element_types::arbitrary_hex ) - // { - // // at least one of them is true - // } - // else - // { - // // unknown element used - // Kokkos::abort("Unknown element type in the mesh \n"); - // } - // }); - - // // Convert from ensight linear hex to a IJK mesh - // CArrayKokkos convert_ensight_to_ijk(8, "convert_ensight_to_ijk"); - - // // Convert the arbitrary order hex to a IJK mesh - // DCArrayKokkos convert_pn_vtk_to_ijk(mesh.num_nodes_in_elem, "convert_pn_vtk_to_ijk"); - - // //build the connectivity for element type 12 - // // elem_types.host(0) - // switch(elem_types.host(0)){ - - // case element_types::linear_quad: - // // the node order is correct, no changes required - - // FOR_ALL (elem_gid, 0, mesh.num_elems, { + // fill temporary nodes in the element array + // array dims are (num_elems,num_nodes_in_elem) + found = extract_values_xml(connectivity.host.pointer(), + "\"connectivity\"", + stop, + in, + size); + if(found==false){ + std::cout << "ERROR: mesh connectivity was not found in the XML file!" << std::endl; + } + connectivity.update_device(); + + // array dims are the (num_elems) + // 8 = pixal i,j,k linear quad format + // 9 = linear quad ensight ordering + // 12 = linear ensight hex ordering + // 72 = VTK_LAGRANGE_HEXAHEDRON + // .... + found = extract_values_xml(elem_types.host.pointer(), + "\"types\"", + stop, + in, + size); + if(found==false){ + std::cout << "ERROR: element types were not found in the XML file!" << std::endl; + } + elem_types.update_device(); + + // check that the element type is supported by Fierro + FOR_ALL (elem_gid, 0, mesh.num_elems, { + if(elem_types(elem_gid) == element_types::linear_quad || + elem_types(elem_gid) == element_types::linear_hex_ijk || + elem_types(elem_gid) == element_types::linear_hex || + elem_types(elem_gid) == element_types::arbitrary_hex ) + { + // at least one of them is true + } + else + { + // unknown element used + Kokkos::abort("Unknown element type in the mesh \n"); + } + }); + + // Convert from ensight linear hex to a IJK mesh + CArrayKokkos convert_ensight_to_ijk(8, "convert_ensight_to_ijk"); + + // Convert the arbitrary order hex to a IJK mesh + DCArrayKokkos convert_pn_vtk_to_ijk(mesh.num_nodes_in_elem, "convert_pn_vtk_to_ijk"); + + //build the connectivity for element type 12 + // elem_types.host(0) + switch(elem_types.host(0)){ + + case element_types::linear_quad: + // the node order is correct, no changes required + + FOR_ALL (elem_gid, 0, mesh.num_elems, { - // for (size_t node_lid=0; node_lid Date: Fri, 5 Sep 2025 20:21:27 -0600 Subject: [PATCH 50/66] WIP: vtu read --- .../src/common/include/mesh_io.h | 303 ++++++++++++------ 1 file changed, 206 insertions(+), 97 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index dd1cff894..e39dc5913 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -52,7 +52,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#define BUFFER_LINES 100000 +#define BUFFER_SIZE 100000 #define MAX_WORD 30 ///////////////////////////////////////////////////////////////////////////// @@ -492,11 +492,11 @@ class MeshReader int elem_words_per_line = num_nodes_in_elem; // allocate read buffer - read_buffer = CArrayKokkos(BUFFER_LINES, words_per_line, MAX_WORD); + read_buffer = CArrayKokkos(BUFFER_SIZE, words_per_line, MAX_WORD); dof_limit = global_num_nodes; - buffer_iterations = dof_limit / BUFFER_LINES; - if (dof_limit % BUFFER_LINES != 0) + buffer_iterations = dof_limit / BUFFER_SIZE; + if (dof_limit % BUFFER_SIZE != 0) { buffer_iterations++; } @@ -521,7 +521,7 @@ class MeshReader // pack buffer on rank 0 if (myrank == 0 && buffer_iteration < buffer_iterations - 1) { - for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) { getline(in, read_line); line_parse.clear(); @@ -541,7 +541,7 @@ class MeshReader else if (myrank == 0) { buffer_loop = 0; - while (buffer_iteration * BUFFER_LINES + buffer_loop < global_num_nodes) { + while (buffer_iteration * BUFFER_SIZE + buffer_loop < global_num_nodes) { getline(in, read_line); line_parse.clear(); line_parse.str(read_line); @@ -559,7 +559,7 @@ class MeshReader } // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong - MPI_Bcast(read_buffer.pointer(), BUFFER_LINES * words_per_line * MAX_WORD, MPI_CHAR, 0, MPI_COMM_WORLD); + MPI_Bcast(read_buffer.pointer(), BUFFER_SIZE * words_per_line * MAX_WORD, MPI_CHAR, 0, MPI_COMM_WORLD); // broadcast how many nodes were read into this buffer iteration MPI_Bcast(&buffer_loop, 1, MPI_INT, 0, MPI_COMM_WORLD); @@ -593,7 +593,7 @@ class MeshReader } } } - read_index_start += BUFFER_LINES; + read_index_start += BUFFER_SIZE; } // end of coordinate readin node_coords_distributed.update_device(); @@ -683,19 +683,19 @@ class MeshReader // read in element connectivity // we're gonna reallocate for the words per line expected for the element connectivity - read_buffer = CArrayKokkos(BUFFER_LINES, elem_words_per_line, MAX_WORD); + read_buffer = CArrayKokkos(BUFFER_SIZE, elem_words_per_line, MAX_WORD); // calculate buffer iterations to read number of lines - buffer_iterations = global_num_elems / BUFFER_LINES; + buffer_iterations = global_num_elems / BUFFER_SIZE; int assign_flag; // dynamic buffer used to store elements before we know how many this rank needs - std::vector element_temp(BUFFER_LINES * elem_words_per_line); - std::vector global_indices_temp(BUFFER_LINES); - size_t buffer_max = BUFFER_LINES * elem_words_per_line; - size_t indices_buffer_max = BUFFER_LINES; + std::vector element_temp(BUFFER_SIZE * elem_words_per_line); + std::vector global_indices_temp(BUFFER_SIZE); + size_t buffer_max = BUFFER_SIZE * elem_words_per_line; + size_t indices_buffer_max = BUFFER_SIZE; - if (global_num_elems % BUFFER_LINES != 0) + if (global_num_elems % BUFFER_SIZE != 0) { buffer_iterations++; } @@ -706,7 +706,7 @@ class MeshReader // pack buffer on rank 0 if (myrank == 0 && buffer_iteration < buffer_iterations - 1) { - for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) { getline(in, read_line); line_parse.clear(); @@ -728,7 +728,7 @@ class MeshReader else if (myrank == 0) { buffer_loop = 0; - while (buffer_iteration * BUFFER_LINES + buffer_loop < global_num_elems) { + while (buffer_iteration * BUFFER_SIZE + buffer_loop < global_num_elems) { getline(in, read_line); line_parse.clear(); line_parse.str(read_line); @@ -749,7 +749,7 @@ class MeshReader } // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong - MPI_Bcast(read_buffer.pointer(), BUFFER_LINES * elem_words_per_line * MAX_WORD, MPI_CHAR, 0, MPI_COMM_WORLD); + MPI_Bcast(read_buffer.pointer(), BUFFER_SIZE * elem_words_per_line * MAX_WORD, MPI_CHAR, 0, MPI_COMM_WORLD); // broadcast how many nodes were read into this buffer iteration MPI_Bcast(&buffer_loop, 1, MPI_INT, 0, MPI_COMM_WORLD); @@ -804,8 +804,8 @@ class MeshReader { if ((num_elems - 1) * elem_words_per_line + inode >= buffer_max) { - element_temp.resize((num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line); - buffer_max = (num_elems - 1) * elem_words_per_line + inode + BUFFER_LINES * elem_words_per_line; + element_temp.resize((num_elems - 1) * elem_words_per_line + inode + BUFFER_SIZE * elem_words_per_line); + buffer_max = (num_elems - 1) * elem_words_per_line + inode + BUFFER_SIZE * elem_words_per_line; } element_temp[(num_elems - 1) * elem_words_per_line + inode] = node_store(inode); // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; @@ -813,13 +813,13 @@ class MeshReader // assign global element id to temporary list if (num_elems - 1 >= indices_buffer_max) { - global_indices_temp.resize(num_elems - 1 + BUFFER_LINES); - indices_buffer_max = num_elems - 1 + BUFFER_LINES; + global_indices_temp.resize(num_elems - 1 + BUFFER_SIZE); + indices_buffer_max = num_elems - 1 + BUFFER_SIZE; } global_indices_temp[num_elems - 1] = elem_gid; } } - read_index_start += BUFFER_LINES; + read_index_start += BUFFER_SIZE; } //set global and local shared element counts @@ -944,7 +944,7 @@ class MeshReader real_t dof_value; real_t unit_scaling = 1; - CArrayKokkos read_buffer; + CArray read_buffer(BUFFER_SIZE*num_dims); // read the mesh @@ -954,6 +954,7 @@ class MeshReader int i; // used for writing information to file int node_gid; // the global id for the point int elem_gid; // the global id for the elem + std::streampos objectid_streampos; // @@ -994,57 +995,33 @@ class MeshReader //------------------------------------ // allocate mesh class nodes and elems - mesh.initialize_nodes(num_nodes); - mesh.initialize_elems(num_elems, num_dims); - - //------------------------------------ - // allocate node coordinate state - std::vector required_node_state = { node_state::coords }; - node.initialize(num_nodes, num_dims, required_node_state); + mesh.global_num_nodes = global_num_nodes; - //------------------------------------ - // allocate the elem object id array - mesh_inps.object_ids = DCArrayKokkos (num_elems, "ObjectIDs"); + //allocate initial contiguously partitioned node coordinate vector and get map of node indices + DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); + // construct contiguous parallel row map now that we know the number of nodes + DistributedMap node_map = node_coords_distributed.pmap; + // map->describe(*fos,Teuchos::VERB_EXTREME); - // ------------------------ - // Mesh file storage order: - // objectId - // Points - // connectivity - // offsets - // types - // ------------------------ - - // temporary arrays - DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files - DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); - DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types + // set the vertices in the mesh read in + size_t num_local_nodes = node_map.size(); // for all fields, we stop recording when we get to "<" std::string stop = "<"; // the size of 1D storage from reading the mesh file - size_t size; + size_t size = 0; // --- // Object ids // --- - // the object id in the element - // array dims are (num_elems) - found = extract_values_xml(mesh_inps.object_ids.host.pointer(), - "\"ObjectId\"", - stop, - in, - size); - if(found==false){ - throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); - //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; + //save fstream pointer for object ids for later after element map is decided (prevents more comms if object ids is always first) + if(myrank==0){ + objectid_streampos = in.tellg(); } - mesh_inps.object_ids.update_device(); - // --- // Nodal coordinates of mesh @@ -1053,19 +1030,115 @@ class MeshReader // coordinates of the node // array dims are (num_nodes,dims) // must use the quotes around Points to read the point values - found = extract_values_xml(node_coords.host.pointer(), - "\"Points\"", - stop, - in, - size); - if(found==false){ - throw std::runtime_error("**** ERROR: mesh nodes were not found in the XML file! ****"); - //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + dof_limit = global_num_nodes*num_dims; + buffer_iterations = dof_limit / (BUFFER_SIZE*num_dims); + size_t remainder_size = dof_limit % (BUFFER_SIZE*num_dims); + if (remainder_size != 0) + { + buffer_iterations++; } - if (size!=num_nodes*3){ - throw std::runtime_error("ERROR: failed to read all the mesh nodes!"); - //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; + + //first find the block with node coords data + if(myrank==0){ + + bool found = false; + + std::string line; + const std::string word = "\"Points\""; + + // Read the file line by line looking for specified word + while (std::getline(in, line)) { + + if (line.find(word) != std::string::npos) { // Check if the portion of the word is in the line + found = true; + } + if(found) { + + if(found) break; + + } // end if found + + } // end while + + if(found==false){ + throw std::runtime_error("**** ERROR: mesh nodes were not found in the XML file! ****"); + //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + } } + + + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++){ + // pack buffer on rank 0 + size_t buffer_iteration_size; + if(buffer_iteration < buffer_iterations - 1){ + buffer_iteration_size = BUFFER_SIZE*num_dims; + } + else{ + buffer_iteration_size = remainder_size; + } + + if(myrank==0){ + + std::string line; + + // loop over the lines in the file until the buffer limit is reached + for(int idata = 0; idata < buffer_iteration_size; idata++){ + + // extract the individual values from the stream + std::string value; + in >> value; + if (value == stop) { // Check if the stop word is in the line + break; + } // end if + read_buffer(i) = std::stod(value); + size++; + } // end for + } + // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong + MPI_Bcast(read_buffer.pointer(), BUFFER_SIZE*num_dims, MPI_DOUBLE, 0, MPI_COMM_WORLD); + // broadcast how many nodes were read into this buffer iteration + MPI_Bcast(&buffer_iteration_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + + // debug_print + // std::cout << "NODE BUFFER LOOP IS: " << buffer_loop << std::endl; + // for(int iprint=0; iprint < buffer_loop; iprint++) + // std::cout<<"buffer packing: " << std::string(&read_buffer(iprint,0,0)) << std::endl; + // return; + + // determine which data to store in the swage mesh members (the local node data) + // loop through read buffer + for (scan_loop = 0; scan_loop < buffer_iteration_size/num_dims; scan_loop++) + { + // set global node id (ensight specific order) + node_gid = read_index_start + scan_loop; + // let map decide if this node id belongs locally; if yes store data + if (node_map.isProcessGlobalIndex(node_gid)) + { + // set local node index in this mpi rank + node_rid = node_map.getLocalIndex(node_gid); + // extract nodal position from the read buffer + // for tecplot format this is the three coords in the same line + dof_value = read_buffer(scan_loop*num_dims); + node_coords_distributed.host(node_rid, 0) = dof_value * unit_scaling; + dof_value = atof(&read_buffer(scan_loop*num_dims + 1)); + node_coords_distributed.host(node_rid, 1) = dof_value * unit_scaling; + if (num_dims == 3) + { + dof_value = atof(&read_buffer(scan_loop*num_dims + 2)); + node_coords_distributed.host(node_rid, 2) = dof_value * unit_scaling; + } + } + } + read_index_start += buffer_iteration_size/num_dims; + } + + if(myrank==0){ + if (size!=global_num_nodes*num_dims){ + throw std::runtime_error("ERROR: failed to read all the mesh nodes!"); + //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; + } + } + node_coords.update_device(); // dimensional scaling of the mesh @@ -1243,6 +1316,42 @@ class MeshReader size_t num_corners = mesh.num_elems * mesh.num_nodes_in_elem; mesh.initialize_corners(num_corners); + + //------------------------------------ + // allocate the elem object id array + mesh_inps.object_ids = DCArrayKokkos (num_elems, "ObjectIDs"); + + + // ------------------------ + // Mesh file storage order: + // objectId + // Points + // connectivity + // offsets + // types + // ------------------------ + + // temporary arrays + DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files + DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); + DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types + + //reset file stream pointer to area where ObjectId is located before node coordinates + in.seekg(first_elem_line_streampos); + + // read the object id in the element + // array dims are (num_elems) + found = extract_values_xml(mesh_inps.object_ids.host.pointer(), + "\"ObjectId\"", + stop, + in, + size); + if(found==false){ + throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); + //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; + } + mesh_inps.object_ids.update_device(); + in.close(); @@ -1721,13 +1830,13 @@ class MeshBuilder // read coords read_index_start = 0; - buffer_iterations = global_num_nodes / BUFFER_LINES; - if (global_num_nodes % BUFFER_LINES != 0) + buffer_iterations = global_num_nodes / BUFFER_SIZE; + if (global_num_nodes % BUFFER_SIZE != 0) { buffer_iterations++; } - read_buffer = CArrayKokkos(BUFFER_LINES, num_dims); + read_buffer = CArrayKokkos(BUFFER_SIZE, num_dims); { //scoped so temp FArray data is auto deleted to save memory //allocate pre-partition node coords using contiguous decomposition @@ -1745,7 +1854,7 @@ class MeshBuilder // pack buffer on rank 0 if (myrank == 0 && buffer_iteration < buffer_iterations - 1) { - for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) { for (int idim = 0; idim < num_dims; idim++) @@ -1753,27 +1862,27 @@ class MeshBuilder // debug print // std::cout<<" "<< substring <(BUFFER_LINES, num_nodes_in_elem); + read_buffer_edof = CArrayKokkos(BUFFER_SIZE, num_nodes_in_elem); // calculate buffer iterations to read number of lines - buffer_iterations = global_num_elems / BUFFER_LINES; + buffer_iterations = global_num_elems / BUFFER_SIZE; int assign_flag; // dynamic buffer used to store elements before we know how many this rank needs - std::vector element_temp(BUFFER_LINES * num_nodes_in_elem); - std::vector global_indices_temp(BUFFER_LINES); - size_t buffer_max = BUFFER_LINES * num_nodes_in_elem; - size_t indices_buffer_max = BUFFER_LINES; + std::vector element_temp(BUFFER_SIZE * num_nodes_in_elem); + std::vector global_indices_temp(BUFFER_SIZE); + size_t buffer_max = BUFFER_SIZE * num_nodes_in_elem; + size_t indices_buffer_max = BUFFER_SIZE; size_t num_elems = 0; - if (global_num_elems % BUFFER_LINES != 0) + if (global_num_elems % BUFFER_SIZE != 0) { buffer_iterations++; } @@ -1888,11 +1997,11 @@ class MeshBuilder // pack buffer on rank 0 if (myrank == 0 && buffer_iteration < buffer_iterations - 1) { - for (buffer_loop = 0; buffer_loop < BUFFER_LINES; buffer_loop++) + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) { for (int inode = 0; inode < num_nodes_in_elem; inode++) { - read_buffer_edof(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_LINES + buffer_loop, inode); + read_buffer_edof(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_SIZE + buffer_loop, inode); } // std::cout <= buffer_max) { - element_temp.resize((num_elems - 1) * num_nodes_in_elem + inode + BUFFER_LINES * num_nodes_in_elem); - buffer_max = (num_elems - 1) * num_nodes_in_elem + inode + BUFFER_LINES * num_nodes_in_elem; + element_temp.resize((num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem); + buffer_max = (num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem; } element_temp[(num_elems - 1) * num_nodes_in_elem + inode] = node_store(inode); // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; @@ -1954,13 +2063,13 @@ class MeshBuilder // assign global element id to temporary list if (num_elems - 1 >= indices_buffer_max) { - global_indices_temp.resize(num_elems - 1 + BUFFER_LINES); - indices_buffer_max = num_elems - 1 + BUFFER_LINES; + global_indices_temp.resize(num_elems - 1 + BUFFER_SIZE); + indices_buffer_max = num_elems - 1 + BUFFER_SIZE; } global_indices_temp[num_elems - 1] = elem_gid; } } - read_index_start += BUFFER_LINES; + read_index_start += BUFFER_SIZE; } //set global and local shared element counts mesh.global_num_elems = global_num_elems; From 623c142a5d9ed31779cd9abf769b0ba40e056e61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 7 Sep 2025 22:24:12 -0600 Subject: [PATCH 51/66] WIP : vtu mesh reader --- .../src/common/include/mesh_io.h | 226 +++++++++++++++--- 1 file changed, 189 insertions(+), 37 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index e39dc5913..77b232fad 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -936,6 +936,7 @@ class MeshReader MPI_Comm_rank(MPI_COMM_WORLD,&myrank); MPI_Comm_size(MPI_COMM_WORLD,&nranks); + std::ifstream in; // FILE *in; int local_node_index, current_column_index; int buffer_loop, buffer_iteration, buffer_iterations, dof_limit, scan_loop; @@ -943,19 +944,26 @@ class MeshReader long long int node_gid; real_t dof_value; real_t unit_scaling = 1; + bool zero_index_base = true; + int negative_index_found = 0; + int global_negative_index_found = 0; - CArray read_buffer(BUFFER_SIZE*num_dims); + CArrayKokkos read_buffer(BUFFER_SIZE*num_dims); // read the mesh // --- Read the number of nodes in the mesh --- // size_t global_num_nodes = 0; size_t global_num_elems = 0; + size_t num_elems = 0; int i; // used for writing information to file - int node_gid; // the global id for the point - int elem_gid; // the global id for the elem std::streampos objectid_streampos; + // dimensional scaling of the mesh + const double scl_x = mesh_inps.scale_x; + const double scl_y = mesh_inps.scale_y; + const double scl_z = mesh_inps.scale_z; + // int Pn_order = mesh_inps.p_order; @@ -989,9 +997,6 @@ class MeshReader // broadcast number of nodes MPI_Bcast(&global_num_nodes, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); - - // broadcast number of elems - MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); //------------------------------------ // allocate mesh class nodes and elems @@ -1119,13 +1124,13 @@ class MeshReader // extract nodal position from the read buffer // for tecplot format this is the three coords in the same line dof_value = read_buffer(scan_loop*num_dims); - node_coords_distributed.host(node_rid, 0) = dof_value * unit_scaling; - dof_value = atof(&read_buffer(scan_loop*num_dims + 1)); - node_coords_distributed.host(node_rid, 1) = dof_value * unit_scaling; + node_coords_distributed.host(node_rid, 0) = dof_value * scl_x; + dof_value = read_buffer(scan_loop*num_dims + 1); + node_coords_distributed.host(node_rid, 1) = dof_value * scl_y; if (num_dims == 3) { - dof_value = atof(&read_buffer(scan_loop*num_dims + 2)); - node_coords_distributed.host(node_rid, 2) = dof_value * unit_scaling; + dof_value = read_buffer(scan_loop*num_dims + 2); + node_coords_distributed.host(node_rid, 2) = dof_value * scl_z; } } } @@ -1139,41 +1144,188 @@ class MeshReader } } - node_coords.update_device(); - - // dimensional scaling of the mesh - const double scl_x = mesh_inps.scale_x; - const double scl_y = mesh_inps.scale_y; - const double scl_z = mesh_inps.scale_z; - - // save the node coordinates to the state array - FOR_ALL(node_gid, 0, mesh.num_nodes, { - - // save the nodal coordinates - node.coords(node_gid, 0) = scl_x*node_coords(node_gid, 0); // double - node.coords(node_gid, 1) = scl_y*node_coords(node_gid, 1); // double - if(num_dims==3){ - node.coords(node_gid, 2) = scl_z*node_coords(node_gid, 2); // double + // repartition node distribution + node_coords_distributed.repartition_vector(); + //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope + std::vector required_node_state = { node_state::coords }; + node_map = node_coords_distributed.pmap; + node.initialize(node_map, num_dims, required_node_state); + //copy coordinate data from repartitioned FArray into CArray + FOR_ALL(node_id, 0, node_map.size(), { + for(int idim = 0; idim < num_dims; idim++){ + node.coords(node_id,idim) = node_coords_distributed(node_id,idim); } + }); - }); // end for parallel nodes - node.coords.update_host(); + //initialize some mesh data + mesh.initialize_nodes(global_num_nodes); + num_local_nodes = node_map.size(); + mesh.num_local_nodes = num_local_nodes; + mesh.node_map = node_map; + /***Element data***/ + + // broadcast number of elems + MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); // --- // Nodes in the element // --- - // fill temporary nodes in the element array - // array dims are (num_elems,num_nodes_in_elem) - found = extract_values_xml(connectivity.host.pointer(), - "\"connectivity\"", - stop, - in, - size); - if(found==false){ - std::cout << "ERROR: mesh connectivity was not found in the XML file!" << std::endl; + dof_limit = global_num_elems*num_nodes_in_elem; + buffer_iterations = dof_limit / (BUFFER_SIZE*num_nodes_in_elem); + remainder_size = dof_limit % (BUFFER_SIZE*num_nodes_in_elem); + if (remainder_size != 0) + { + buffer_iterations++; } + + + CArrayKokkos elem_read_buffer(BUFFER_SIZE*num_nodes_in_elem); + CArrayKokkos node_store(num_nodes_in_elem); + + std::vector element_temp(BUFFER_SIZE * num_nodes_in_elem); + std::vector global_indices_temp(BUFFER_SIZE); + size_t buffer_max = BUFFER_SIZE * elem_words_per_line; + size_t indices_buffer_max = BUFFER_SIZE; + int assign_flag; + + //first find the block with element connectivity data data + if(myrank==0){ + + bool found = false; + + std::string line; + const std::string word = "\"connectivity\""; + + // Read the file line by line looking for specified word + while (std::getline(in, line)) { + + if (line.find(word) != std::string::npos) { // Check if the portion of the word is in the line + found = true; + } + if(found) { + + if(found) break; + + } // end if found + + } // end while + + if(found==false){ + throw std::runtime_error("ERROR: mesh connectivity was not found in the XML file!"); + //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + } + } + + + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++){ + // pack buffer on rank 0 + size_t buffer_iteration_size; + if(buffer_iteration < buffer_iterations - 1){ + buffer_iteration_size = BUFFER_SIZE*num_nodes_in_elem; + } + else{ + buffer_iteration_size = remainder_size; + } + + if(myrank==0){ + + std::string line; + + // loop over the lines in the file until the buffer limit is reached + for(int idata = 0; idata < buffer_iteration_size; idata++){ + + // extract the individual values from the stream + std::string value; + in >> value; + if (value == stop) { // Check if the stop word is in the line + break; + } // end if + elem_read_buffer(i) = std::stod(value); + size++; + } // end for + } + // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong + MPI_Bcast(elem_read_buffer.pointer(), BUFFER_SIZE*num_nodes_in_elem, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + // broadcast how many nodes were read into this buffer iteration + MPI_Bcast(&buffer_iteration_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + + // debug_print + // std::cout << "NODE BUFFER LOOP IS: " << buffer_loop << std::endl; + // for(int iprint=0; iprint < buffer_loop; iprint++) + // std::cout<<"buffer packing: " << std::string(&read_buffer(iprint,0,0)) << std::endl; + // return; + + // determine which data to store in the swage mesh members (the local node data) + // loop through read buffer + for (scan_loop = 0; scan_loop < buffer_iteration_size/num_nodes_in_elem; scan_loop++) + { + // set global node id (ensight specific order) + elem_gid = read_index_start + scan_loop; + // add this element to the local list if any of its nodes belong to this rank according to the map + // get list of nodes for each element line and check if they belong to the map + assign_flag = 0; + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { + // as we loop through the nodes belonging to this element we store them + // if any of these nodes belongs to this rank this list is used to store the element locally + node_gid = elem_read_buffer(scan_loop*num_nodes_in_elem + inode); + if (zero_index_base) + { + node_store(inode) = node_gid; // subtract 1 since file index start is 1 but code expects 0 + } + else + { + node_store(inode) = node_gid - 1; // subtract 1 since file index start is 1 but code expects 0 + } + if (node_store(inode) < 0) + { + negative_index_found = 1; + } + // first we add the elements to a dynamically allocated list + if (zero_index_base) + { + if (node_map.isProcessGlobalIndex(node_gid) && !assign_flag) + { + assign_flag = 1; + num_elems++; + } + } + else + { + if (node_map.isProcessGlobalIndex(node_gid - 1) && !assign_flag) + { + assign_flag = 1; + num_elems++; + } + } + } + + if (assign_flag) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { + if ((num_elems - 1) * num_nodes_in_elem + inode >= buffer_max) + { + element_temp.resize((num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem); + buffer_max = (num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem; + } + element_temp[(num_elems - 1) * num_nodes_in_elem + inode] = node_store(inode); + // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; + } + // assign global element id to temporary list + if (num_elems - 1 >= indices_buffer_max) + { + global_indices_temp.resize(num_elems - 1 + BUFFER_SIZE); + indices_buffer_max = num_elems - 1 + BUFFER_SIZE; + } + global_indices_temp[num_elems - 1] = elem_gid; + } + } + read_index_start += buffer_iteration_size/num_nodes_in_elem; + } + connectivity.update_device(); // array dims are the (num_elems) From e31114e6d569cc53d37e5216f76126988d70ecb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 9 Sep 2025 00:10:57 -0600 Subject: [PATCH 52/66] WIP: vtu reader --- .../src/common/include/mesh_io.h | 329 +++++++++++++++--- 1 file changed, 277 insertions(+), 52 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 77b232fad..f7b7ceaf6 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -952,6 +952,16 @@ class MeshReader // read the mesh + + // ------------------------ + // Mesh file storage order: + // objectId + // Points + // connectivity + // offsets + // types + // ------------------------ + // --- Read the number of nodes in the mesh --- // size_t global_num_nodes = 0; size_t global_num_elems = 0; @@ -1186,9 +1196,11 @@ class MeshReader std::vector element_temp(BUFFER_SIZE * num_nodes_in_elem); std::vector global_indices_temp(BUFFER_SIZE); - size_t buffer_max = BUFFER_SIZE * elem_words_per_line; + size_t buffer_max = BUFFER_SIZE * num_nodes_in_elem; size_t indices_buffer_max = BUFFER_SIZE; int assign_flag; + read_index_start = 0; + size = 0; //first find the block with element connectivity data data if(myrank==0){ @@ -1242,7 +1254,7 @@ class MeshReader if (value == stop) { // Check if the stop word is in the line break; } // end if - elem_read_buffer(i) = std::stod(value); + elem_read_buffer(i) = std::stoll(value); size++; } // end for } @@ -1326,7 +1338,62 @@ class MeshReader read_index_start += buffer_iteration_size/num_nodes_in_elem; } - connectivity.update_device(); + if(myrank==0){ + if (size!=global_num_elems*num_nodes_in_elem){ + throw std::runtime_error("ERROR: failed to read all the mesh elements!"); + //std::cout << "ERROR: failed to read all the mesh nodes!" << std::endl; + } + } + + //interface with permanent storage of nodes in elem and construct element maps + DCArrayKokkos All_Element_Global_Indices(num_elems); + // copy temporary global indices storage to view storage + for (int ielem = 0; ielem < num_elems; ielem++) + { + All_Element_Global_Indices.host(ielem) = global_indices_temp[ielem]; + if (global_indices_temp[ielem] < 0) + { + negative_index_found = 1; + } + } + + MPI_Allreduce(&negative_index_found, &global_negative_index_found, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); + if (global_negative_index_found) + { + if (myrank == 0) + { + std::cout << "Node index less than or equal to zero detected; set \"zero_index_base: true\" under \"input_options\" in your yaml file if indices start at 0" << std::endl; + } + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + exit(0); + } + + All_Element_Global_Indices.update_device(); + + // construct global map of local and shared elements (since different ranks can own the same elements due to the local node map) + DistributedMap element_map = DistributedMap(All_Element_Global_Indices); + + //initialize elem data structures + mesh.initialize_elems(num_elems, num_nodes_in_elem, element_map); + + // copy temporary element storage to distributed storage + DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; + + for (int ielem = 0; ielem < num_elems; ielem++) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) + nodes_in_elem.host(ielem, inode) = element_temp[ielem * num_nodes_in_elem + inode]; + } + } + + // delete temporary element connectivity and index storage + std::vector().swap(element_temp); + std::vector().swap(global_indices_temp); + + /****read in element types ****/ + DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types // array dims are the (num_elems) // 8 = pixal i,j,k linear quad format @@ -1334,14 +1401,104 @@ class MeshReader // 12 = linear ensight hex ordering // 72 = VTK_LAGRANGE_HEXAHEDRON // .... - found = extract_values_xml(elem_types.host.pointer(), - "\"types\"", - stop, - in, - size); - if(found==false){ - std::cout << "ERROR: element types were not found in the XML file!" << std::endl; + + dof_limit = global_num_elems; + buffer_iterations = dof_limit / (BUFFER_SIZE); + remainder_size = dof_limit % (BUFFER_SIZE); + if (remainder_size != 0) + { + buffer_iterations++; + } + + read_index_start = 0; + size = 0; + elem_read_buffer = CArrayKokkos(BUFFER_SIZE); + + //allocate types array + + //first find the block with element connectivity data data + if(myrank==0){ + + bool found = false; + + std::string line; + const std::string word = "\"types\""; + + // Read the file line by line looking for specified word + while (std::getline(in, line)) { + + if (line.find(word) != std::string::npos) { // Check if the portion of the word is in the line + found = true; + } + if(found) { + + if(found) break; + + } // end if found + + } // end while + + if(found==false){ + throw std::runtime_error("ERROR: mesh element types were not found in the XML file!"); + //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + } + } + + + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++){ + // pack buffer on rank 0 + size_t buffer_iteration_size; + if(buffer_iteration < buffer_iterations - 1){ + buffer_iteration_size = BUFFER_SIZE; + } + else{ + buffer_iteration_size = remainder_size; + } + + if(myrank==0){ + + std::string line; + + // loop over the lines in the file until the buffer limit is reached + for(int idata = 0; idata < buffer_iteration_size; idata++){ + + // extract the individual values from the stream + std::string value; + in >> value; + if (value == stop) { // Check if the stop word is in the line + break; + } // end if + elem_read_buffer(i) = std::stoi(value); + size++; + } // end for + } + // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong + MPI_Bcast(elem_read_buffer.pointer(), BUFFER_SIZE, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + // broadcast how many nodes were read into this buffer iteration + MPI_Bcast(&buffer_iteration_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + + // debug_print + // std::cout << "NODE BUFFER LOOP IS: " << buffer_loop << std::endl; + // for(int iprint=0; iprint < buffer_loop; iprint++) + // std::cout<<"buffer packing: " << std::string(&read_buffer(iprint,0,0)) << std::endl; + // return; + + // determine which data to store in the swage mesh members (the local node data) + // loop through read buffer + for (scan_loop = 0; scan_loop < buffer_iteration_size; scan_loop++) + { + // set global node id (ensight specific order) + elem_gid = read_index_start + scan_loop; + //add to the local type array if this elem gid belongs to this rank + if (element_map.isProcessGlobalIndex(elem_gid)){ + elem_types.host(element_map.getLocalIndex(elem_gid)) = elem_read_buffer(scan_loop); + } + + + } + read_index_start += buffer_iteration_size; } + elem_types.update_device(); // check that the element type is supported by Fierro @@ -1373,13 +1530,13 @@ class MeshReader case element_types::linear_quad: // the node order is correct, no changes required - FOR_ALL (elem_gid, 0, mesh.num_elems, { + // FOR_ALL (elem_gid, 0, mesh.num_elems, { - for (size_t node_lid=0; node_lid (num_elems, "ObjectIDs"); + + //reset file stream pointer to area where ObjectId is located before node coordinates + in.seekg(objectid_streampos); + dof_limit = global_num_elems; + buffer_iterations = dof_limit / (BUFFER_SIZE); + remainder_size = dof_limit % (BUFFER_SIZE); + if (remainder_size != 0) + { + buffer_iterations++; + } - // ------------------------ - // Mesh file storage order: - // objectId - // Points - // connectivity - // offsets - // types - // ------------------------ - - // temporary arrays - DCArrayKokkos node_coords(num_nodes,3, "node_coords_vtu_file"); // always 3 with vtu files - DCArrayKokkos connectivity(num_elems,num_nodes_in_elem, "connectivity_vtu_file"); - DCArrayKokkos elem_types(num_elems, "elem_types_vtu_file"); // element types + read_index_start = 0; + size = 0; - //reset file stream pointer to area where ObjectId is located before node coordinates - in.seekg(first_elem_line_streampos); - - // read the object id in the element - // array dims are (num_elems) - found = extract_values_xml(mesh_inps.object_ids.host.pointer(), - "\"ObjectId\"", - stop, - in, - size); - if(found==false){ - throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); - //std::cout << "ERROR: ObjectIDs were not found in the XML file!" << std::endl; + //allocate types array + + //first find the block with element connectivity data data + if(myrank==0){ + + bool found = false; + + std::string line; + const std::string word = "\"ObjectId\""; + + // Read the file line by line looking for specified word + while (std::getline(in, line)) { + + if (line.find(word) != std::string::npos) { // Check if the portion of the word is in the line + found = true; + } + if(found) { + + if(found) break; + + } // end if found + + } // end while + + if(found==false){ + throw std::runtime_error("ERROR: ObjectIDs were not found in the XML file!"); + //std::cout << "ERROR: mesh nodes were not found in the XML file!" << std::endl; + } } - mesh_inps.object_ids.update_device(); + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++){ + // pack buffer on rank 0 + size_t buffer_iteration_size; + if(buffer_iteration < buffer_iterations - 1){ + buffer_iteration_size = BUFFER_SIZE; + } + else{ + buffer_iteration_size = remainder_size; + } + + if(myrank==0){ + + std::string line; + + // loop over the lines in the file until the buffer limit is reached + for(int idata = 0; idata < buffer_iteration_size; idata++){ + + // extract the individual values from the stream + std::string value; + in >> value; + if (value == stop) { // Check if the stop word is in the line + break; + } // end if + elem_read_buffer(i) = std::stoi(value); + size++; + } // end for + } + // broadcast buffer to all ranks; each rank will determine which nodes in the buffer belong + MPI_Bcast(elem_read_buffer.pointer(), BUFFER_SIZE, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + // broadcast how many nodes were read into this buffer iteration + MPI_Bcast(&buffer_iteration_size, 1, MPI_INT, 0, MPI_COMM_WORLD); + + // debug_print + // std::cout << "NODE BUFFER LOOP IS: " << buffer_loop << std::endl; + // for(int iprint=0; iprint < buffer_loop; iprint++) + // std::cout<<"buffer packing: " << std::string(&read_buffer(iprint,0,0)) << std::endl; + // return; + + // determine which data to store in the swage mesh members (the local node data) + // loop through read buffer + for (scan_loop = 0; scan_loop < buffer_iteration_size; scan_loop++) + { + // set global node id (ensight specific order) + elem_gid = read_index_start + scan_loop; + //add to the local type array if this elem gid belongs to this rank + if (element_map.isProcessGlobalIndex(elem_gid)){ + mesh_inps.object_ids.host(element_map.getLocalIndex(elem_gid)) = elem_read_buffer(scan_loop); + } + + + } + read_index_start += buffer_iteration_size; + } + + mesh_inps.object_ids.update_device(); + in.close(); } // end of VTMread function From 0c32ec750c1484ceaee6b80e4393799a9b9e57f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Tue, 9 Sep 2025 18:57:10 -0600 Subject: [PATCH 53/66] WIP: vtu reader --- single-node-refactor/src/common/include/mesh.h | 16 +++++++++++++++- .../src/common/include/mesh_inputs.h | 4 +++- .../src/common/include/mesh_io.h | 4 +++- single-node-refactor/src/driver.cpp | 2 +- 4 files changed, 22 insertions(+), 4 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index 6fa68af8d..4af95ece3 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -37,6 +37,7 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "matar.h" #include "state.h" #include "ref_elem.h" +#include "mesh_inputs.h" #include #define PI 3.141592653589793 @@ -395,7 +396,7 @@ struct Mesh_t /* ---------------------------------------------------------------------- Initialize Ghost and Non-Overlapping Element Maps ------------------------------------------------------------------------- */ - void init_maps(node_t& node) + void init_maps(node_t& node, mesh_input_t& mesh_inps) { int local_node_index, current_column_index; long long int node_gid; @@ -573,6 +574,19 @@ struct Mesh_t nodes_in_elem.host(ielem, lnode) = nodes_in_elem.host(last_storage_index, lnode); nodes_in_elem.host(last_storage_index, lnode) = Temp_Nodes(lnode); } + //resort element data that may have been read in such as vtu types and object ids + // if(mesh_inps.input_elem_types){ + // int temp; + // temp = elem_types.host(ielem); + // elem_types.host(ielem) = elem_types.host(last_storage_index); + // elem_types.host(last_storage_index) = temp; + // } + if(mesh_inps.input_elem_objectids){ + int temp; + temp = mesh_inps.object_ids.host(ielem); + mesh_inps.object_ids.host(ielem) = mesh_inps.object_ids.host(last_storage_index); + mesh_inps.object_ids.host(last_storage_index) = temp; + } last_storage_index--; // test if swapped element is also not part of the non overlap map; if so lower loop counter to repeat the above diff --git a/single-node-refactor/src/common/include/mesh_inputs.h b/single-node-refactor/src/common/include/mesh_inputs.h index e76197488..47f0f5c07 100644 --- a/single-node-refactor/src/common/include/mesh_inputs.h +++ b/single-node-refactor/src/common/include/mesh_inputs.h @@ -101,7 +101,9 @@ struct mesh_input_t double scale_y = 1.0; ///< Scales mesh y coordinate dimensions double scale_z = 1.0; ///< Scales mesh z coordinate dimensions - DCArrayKokkos object_ids; ///< the object_ids in the vtu full mesh file (from exodus mesh) + DCArrayKokkos object_ids; ///< the object_ids in the vtu full mesh file (from exodus mesh) + bool input_elem_types = false; + bool input_elem_objectids = false; }; // mesh_input_t diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index f7b7ceaf6..040d4a56a 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -375,7 +375,7 @@ class MeshReader read_vtk_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); } else if(extension == "vtu"){ // vtu file format - //read_vtu_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); + read_vtu_mesh(mesh, State.GaussPoints, State.node, State.corner, mesh_inps, num_dims); } else{ throw std::runtime_error("**** Mesh file extension not understood ****"); @@ -949,6 +949,8 @@ class MeshReader int global_negative_index_found = 0; CArrayKokkos read_buffer(BUFFER_SIZE*num_dims); + mesh_inps.input_elem_types = true; + mesh_inps.input_elem_objectids = true; // read the mesh diff --git a/single-node-refactor/src/driver.cpp b/single-node-refactor/src/driver.cpp index 8c3936945..d39ab723f 100644 --- a/single-node-refactor/src/driver.cpp +++ b/single-node-refactor/src/driver.cpp @@ -107,7 +107,7 @@ void Driver::initialize() } //build relevant partition maps for ghost nodes, elements, etc. - mesh.init_maps(State.node); + mesh.init_maps(State.node, SimulationParameters.mesh_input); // Build connectivity mesh.build_connectivity(); From bcd5a6ee804c1466250d9ca67492af77ca649671 Mon Sep 17 00:00:00 2001 From: Adrian-Diaz <46537160+Adrian-Diaz@users.noreply.github.com> Date: Fri, 12 Sep 2025 20:06:51 -0600 Subject: [PATCH 54/66] WIP: vtu debug --- single-node-refactor/src/common/include/mesh_io.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 040d4a56a..ee134e906 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -224,7 +224,7 @@ inline bool extract_num_points_and_cells_xml(size_t& numberOfPoints, // Read the file line by line looking for NumberOfPoints while (std::getline(in, line)) { - + //std::cout << line << std::endl; std::string word = "NumberOfPoints="; // A portion of a word if (line.find(word) != std::string::npos) { // Check if the portion of the word is in the line @@ -988,8 +988,6 @@ class MeshReader std::cout<<"Reading VTU file in a multiblock VTK mesh"< Date: Sun, 14 Sep 2025 22:07:33 -0600 Subject: [PATCH 55/66] ENH: parallel vtu reader --- .../src/common/include/mesh_io.h | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index ee134e906..cca7a78a9 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -610,7 +610,7 @@ class MeshReader } }); } - + node.coords.update_device(); //initialize some mesh data mesh.initialize_nodes(global_num_nodes); num_local_nodes = node_map.size(); @@ -968,7 +968,6 @@ class MeshReader size_t global_num_nodes = 0; size_t global_num_elems = 0; size_t num_elems = 0; - int i; // used for writing information to file std::streampos objectid_streampos; // dimensional scaling of the mesh @@ -1069,7 +1068,7 @@ class MeshReader } if(found) { - if(found) break; + break; } // end if found @@ -1105,7 +1104,7 @@ class MeshReader if (value == stop) { // Check if the stop word is in the line break; } // end if - read_buffer(i) = std::stod(value); + read_buffer(idata) = std::stod(value); size++; } // end for } @@ -1166,7 +1165,7 @@ class MeshReader node.coords(node_id,idim) = node_coords_distributed(node_id,idim); } }); - + node.coords.update_host(); //initialize some mesh data mesh.initialize_nodes(global_num_nodes); num_local_nodes = node_map.size(); @@ -1218,7 +1217,7 @@ class MeshReader } if(found) { - if(found) break; + break; } // end if found @@ -1254,7 +1253,7 @@ class MeshReader if (value == stop) { // Check if the stop word is in the line break; } // end if - elem_read_buffer(i) = std::stoll(value); + elem_read_buffer(idata) = std::stoll(value); size++; } // end for } @@ -1387,6 +1386,11 @@ class MeshReader nodes_in_elem.host(ielem, inode) = element_temp[ielem * num_nodes_in_elem + inode]; } } + nodes_in_elem.update_device(); + + //debug print + //for (int inode = 0; inode < num_nodes_in_elem; inode++) + //std::cout << "Element nodes " << inode << " " << nodes_in_elem(num_elems-1,inode) << std::endl; // delete temporary element connectivity and index storage std::vector().swap(element_temp); @@ -1432,7 +1436,7 @@ class MeshReader } if(found) { - if(found) break; + break; } // end if found @@ -1468,7 +1472,10 @@ class MeshReader if (value == stop) { // Check if the stop word is in the line break; } // end if - elem_read_buffer(i) = std::stoi(value); + elem_read_buffer(idata) = std::stoi(value); + // if(elem_read_buffer(i)!=11){ + // std::cout << "FOUND TYPE ERROR " << std::endl; + // } size++; } // end for } @@ -1491,6 +1498,7 @@ class MeshReader elem_gid = read_index_start + scan_loop; //add to the local type array if this elem gid belongs to this rank if (element_map.isProcessGlobalIndex(elem_gid)){ + //std::cout << "elocal id " << element_map.getLocalIndex(elem_gid) << " " << elem_read_buffer(scan_loop) << std::endl; elem_types.host(element_map.getLocalIndex(elem_gid)) = elem_read_buffer(scan_loop); } @@ -1558,7 +1566,7 @@ class MeshReader // next case case element_types::linear_hex: - + std::cout << "Converting mesh element ordering" << std::endl; RUN({ convert_ensight_to_ijk(0) = 0; convert_ensight_to_ijk(1) = 1; @@ -1572,9 +1580,12 @@ class MeshReader // read the node ids in the element FOR_ALL (elem_id, 0, mesh.num_elems, { - + long long int temp[num_nodes_in_elem]; for (size_t node_lid=0; node_lid Date: Mon, 15 Sep 2025 22:19:02 -0600 Subject: [PATCH 56/66] WIP: 2D SGH --- .../SGH_solver_3D/include/sgh_solver_3D.h | 5 +- .../Solvers/SGH_solver_3D/src/sgh_execute.cpp | 17 +-- .../SGH_solver_rz/include/sgh_solver_rz.h | 4 + .../Solvers/SGH_solver_rz/src/momentum_rz.cpp | 2 +- .../SGH_solver_rz/src/sgh_execute_rz.cpp | 123 +++++++++++++----- .../SGH_solver_rz/src/sgh_initialize_rz.cpp | 6 +- .../SGH_solver_rz/src/sgh_setup_rz.cpp | 8 +- 7 files changed, 107 insertions(+), 58 deletions(-) diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h index 5e0ac83d1..e7a7649e4 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/include/sgh_solver_3D.h @@ -453,15 +453,12 @@ class SGH3D : public Solver double sum_domain_internal_energy( const DRaggedRightArrayKokkos& MaterialPoints_mass, const DRaggedRightArrayKokkos& MaterialPoints_sie, - const MaterialToMeshMap_t& MaterialToMeshMaps, const size_t num_mat_points, - const size_t mat_id, - const size_t num_local_elems); + const size_t mat_id); double sum_domain_kinetic_energy( const Mesh_t& mesh, const DistributedDCArray& node_vel, - const DistributedDCArray& node_coords, const DistributedDCArray& node_mass); double sum_domain_material_mass( diff --git a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp index 4443d621e..db44e1bdf 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_3D/src/sgh_execute.cpp @@ -120,16 +120,13 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, local_IE_t0 += sum_domain_internal_energy(State.MaterialPoints.mass, State.MaterialPoints.sie, - State.MaterialToMeshMaps, State.MaterialPoints.num_material_local_points.host(mat_id), - mat_id, - mesh.num_local_elems); + mat_id); } // end loop over mat_id // extensive KE local_KE_t0 = sum_domain_kinetic_energy(mesh, State.node.vel, - State.node.coords, State.node.mass); //collect KE and TE sums across all processes @@ -152,7 +149,7 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, for (size_t mat_id = 0; mat_id < num_mats; mat_id++) { double global_mass_domain_mat; - std::cout << " local element count for mass loop " << State.MaterialPoints.num_material_local_points.host(mat_id) << std::endl; + //std::cout << " local element count for mass loop " << State.MaterialPoints.num_material_local_points.host(mat_id) << std::endl; double mass_domain_mat = sum_domain_material_mass(State.MaterialPoints.mass, State.MaterialPoints.num_material_local_points.host(mat_id), mat_id); @@ -555,16 +552,13 @@ void SGH3D::execute(SimulationParameters_t& SimulationParameters, local_IE_tend += sum_domain_internal_energy(State.MaterialPoints.mass, State.MaterialPoints.sie, - State.MaterialToMeshMaps, State.MaterialPoints.num_material_local_points.host(mat_id), - mat_id, - mesh.num_local_elems); + mat_id); } // end loop over mat_id // extensive KE local_KE_tend = sum_domain_kinetic_energy(mesh, State.node.vel, - State.node.coords, State.node.mass); MPI_Allreduce(&local_IE_tend, &IE_tend, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); @@ -689,10 +683,8 @@ double max_Eigen3D(const ViewCArrayKokkos tensor) double sum_domain_internal_energy( const DRaggedRightArrayKokkos& MaterialPoints_mass, const DRaggedRightArrayKokkos& MaterialPoints_sie, - const MaterialToMeshMap_t& MaterialToMeshMaps, const size_t num_mat_points, - const size_t mat_id, - const size_t num_local_elems) + const size_t mat_id) { double IE_sum = 0.0; double IE_loc_sum; @@ -725,7 +717,6 @@ double sum_domain_internal_energy( double sum_domain_kinetic_energy( const Mesh_t& mesh, const DistributedDCArray& node_vel, - const DistributedDCArray& node_coords, const DistributedDCArray& node_mass) { // extensive KE diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h index 0ac9b3f85..f06b81dd2 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/include/sgh_solver_rz.h @@ -117,6 +117,10 @@ class SGHRZ : public Solver } ~SGHRZ() = default; + + //member variables + CommPlan node_velocity_comms; + CommPlan node_mass_comms; ///////////////////////////////////////////////////////////////////////////// /// diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp index bd96217ab..7679efbe3 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/momentum_rz.cpp @@ -61,7 +61,7 @@ void SGHRZ::update_velocity_rz( const size_t num_dims = 2; // walk over the nodes to update the velocity - FOR_ALL(node_gid, 0, mesh.num_nodes, { + FOR_ALL(node_gid, 0, mesh.num_local_nodes, { double node_force[3]; for (size_t dim = 0; dim < num_dims; dim++) { node_force[dim] = 0.0; diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp index 3add48621..17fbf5286 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_execute_rz.cpp @@ -80,6 +80,11 @@ void SGHRZ::execute(SimulationParameters_t& SimulationParameters, // Create mesh writer MeshWriter mesh_writer; // Note: Pull to driver after refactoring evolution + //MPI data + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + // --- graphics vars ---- CArray graphics_times = CArray(20000); graphics_times(0) = this->time_start; // was zero @@ -96,8 +101,9 @@ void SGHRZ::execute(SimulationParameters_t& SimulationParameters, State.node.mass, mesh.num_nodes); - - std::cout << "Applying initial boundary conditions" << std::endl; + if(myrank==0){ + std::cout << "Applying initial boundary conditions" << std::endl; + } boundary_velocity_rz(mesh, BoundaryConditions, State.node.vel, time_value); // Time value = 0.0; @@ -106,6 +112,8 @@ void SGHRZ::execute(SimulationParameters_t& SimulationParameters, double IE_t0 = 0.0; double KE_t0 = 0.0; double TE_t0 = 0.0; + double local_IE_t0 = 0.0; + double local_KE_t0 = 0.0; // the number of materials specified by the user input @@ -114,39 +122,55 @@ void SGHRZ::execute(SimulationParameters_t& SimulationParameters, // extensive IE for(size_t mat_id=0; mat_id(time_2 - time_1).count(); - printf("\nCalculation time in seconds: %f \n", calc_time * 1e-9); + if(myrank==0){ + printf("\nCalculation time in seconds: %f \n", calc_time * 1e-9); + } // ---- Calculate energy tallies ---- double IE_tend = 0.0; double KE_tend = 0.0; double TE_tend = 0.0; + double local_IE_tend = 0.0; + double local_KE_tend = 0.0; // extensive IE for(size_t mat_id=0; mat_idsolver_id = solver_id_inp; - State.node.initialize(num_nodes, num_dim, SGHRZ_State::required_node_state); + State.node.initialize(mesh.all_node_map, num_dim, SGHRZ_State::required_node_state, mesh.node_map); State.GaussPoints.initialize(num_gauss_pts, 3, SGHRZ_State::required_gauss_pt_state); // note: dims is always 3 State.corner.initialize(num_corners, num_dim, SGHRZ_State::required_corner_state); + + //comms objects + node_velocity_comms = CommPlan(State.node.vel, State.node.local_vel, mesh.node_coords_comms); //copies MPI setup from coordinate comms since the node maps are the same + node_mass_comms = CommPlan(State.node.mass, State.node.local_mass, mesh.node_coords_comms); //copies MPI setup from coordinate comms since the node maps are the same // NOTE: Material points and material corners are initialize in sgh_setup after calculating the material->mesh maps } diff --git a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp index 2473a68b4..15d6bf00a 100644 --- a/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp +++ b/single-node-refactor/src/Solvers/SGH_solver_rz/src/sgh_setup_rz.cpp @@ -86,6 +86,8 @@ void SGHRZ::setup(SimulationParameters_t& SimulationParameters, { // add a flag on whether SGHRZ was set up, if(SGHRZ_setup_already==false) + //update node velocity on ghosts + node_velocity_comms.execute_comms(); const size_t num_mats = Materials.num_mats; // the number of materials on the mesh @@ -107,12 +109,9 @@ void SGHRZ::setup(SimulationParameters_t& SimulationParameters, } // for loop over mat_id - // set corner and node masses to zero init_corner_node_masses_zero_rz(mesh, State.node.mass, State.corner.mass); - - // 2D RZ // calculate the corner massess if 2D @@ -134,6 +133,9 @@ void SGHRZ::setup(SimulationParameters_t& SimulationParameters, State.node.mass, State.corner.mass); + //communicate node masses to ghosts + node_mass_comms.execute_comms(); + } // end SGHRZ setup From bca150e9ecd2e2fec83a2844d7e80a1024e1c3fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 22 Sep 2025 18:06:22 -0600 Subject: [PATCH 57/66] ENH: 2D sgh mpi enabled --- .../src/common/include/mesh_io.h | 992 ++++++++++++++---- 1 file changed, 799 insertions(+), 193 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index cca7a78a9..bf04d6a93 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1792,10 +1792,10 @@ class MeshBuilder { if (SimulationParameters.mesh_input.num_dims == 2) { if (SimulationParameters.mesh_input.type == mesh_input::Polar) { - //build_2d_polar(mesh, GaussPoints, node, corner, SimulationParameters); + build_2d_polar(mesh, GaussPoints, node, corner, SimulationParameters); } else if (SimulationParameters.mesh_input.type == mesh_input::Box) { - //build_2d_box(mesh, GaussPoints, node, corner, SimulationParameters); + build_2d_box(mesh, GaussPoints, node, corner, SimulationParameters); } else{ std::cout << "**** 2D MESH TYPE NOT SUPPORTED **** " << std::endl; @@ -1828,249 +1828,855 @@ class MeshBuilder /// \param Simulation parameters /// ///////////////////////////////////////////////////////////////////////////// - // void build_2d_box(Mesh_t& mesh, - // GaussPoint_t& GaussPoints, - // node_t& node, - // corner_t& corner, - // SimulationParameters_t& SimulationParameters) const - // { - // printf("Creating a 2D box mesh \n"); - // const int num_dim = 2; + void build_2d_box(Mesh_t& mesh, + GaussPoint_t& GaussPoints, + node_t& node, + corner_t& corner, + SimulationParameters_t& SimulationParameters) const + { + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + /*currently we just build the global mesh data on rank 0 and then broadcast relevant data to each rank + before the global mesh data on rank 0 falls out of scope*/ + int global_num_nodes, global_num_elems; + CArrayKokkos read_buffer; + CArrayKokkos read_buffer_edof; + CArrayKokkos global_coords; + CArrayKokkos global_nodes_in_elem; + auto convert_point_number_in_quad = CArray(4); + convert_point_number_in_quad(0) = 0; + convert_point_number_in_quad(1) = 1; + convert_point_number_in_quad(2) = 3; + convert_point_number_in_quad(3) = 2; - // const double lx = SimulationParameters.mesh_input.length[0]; - // const double ly = SimulationParameters.mesh_input.length[1]; + int local_node_index, current_column_index; + int buffer_loop, buffer_iteration, buffer_iterations, dof_limit, scan_loop; + int negative_index_found = 0; + int global_negative_index_found = 0; - // const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; - // const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; + size_t read_index_start, node_rid, elem_gid; + long long int node_gid; + real_t dof_value; + real_t unit_scaling = 1; - // const int num_points_i = num_elems_i + 1; // num points in x - // const int num_points_j = num_elems_j + 1; // num points in y + const int num_dims = 2; + size_t num_nodes_in_elem = 1; + for (int dim = 0; dim < num_dims; dim++) { + num_nodes_in_elem *= 2; + } + if(myrank==0){ + printf("Creating a 2D box mesh \n"); + } - // const int num_nodes = num_points_i * num_points_j; + // SimulationParameters.mesh_input.length.update_host(); + const double lx = SimulationParameters.mesh_input.length[0]; + const double ly = SimulationParameters.mesh_input.length[1]; - // const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) - // const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) + // SimulationParameters.mesh_input.num_elems.update_host(); + const int num_elems_i = SimulationParameters.mesh_input.num_elems[0]; + const int num_elems_j = SimulationParameters.mesh_input.num_elems[1]; - // const int num_elems = num_elems_i * num_elems_j; + const int num_points_i = num_elems_i + 1; // num points in x + const int num_points_j = num_elems_j + 1; // num points in y - // std::vector origin(num_dim); - // // SimulationParameters.mesh_input.origin.update_host(); - // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + global_num_nodes = num_points_i * num_points_j; - // // --- 2D parameters --- - // // const int num_faces_in_elem = 4; // number of faces in elem - // // const int num_points_in_elem = 4; // number of points in elem - // // const int num_points_in_face = 2; // number of points in a face - // // const int num_edges_in_elem = 4; // number of edges in a elem - - // // --- mesh node ordering --- - // // Convert ijk index system to the finite element numbering convention - // // for vertices in elem - // auto convert_point_number_in_quad = CArray(4); - // convert_point_number_in_quad(0) = 0; - // convert_point_number_in_quad(1) = 1; - // convert_point_number_in_quad(2) = 3; - // convert_point_number_in_quad(3) = 2; - - // // intialize node variables - // mesh.initialize_nodes(num_nodes); - - // // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers - // std::vector required_node_state = { node_state::coords }; - // node.initialize(num_nodes, num_dim, required_node_state); + const double dx = lx / ((double)num_elems_i); // len/(num_elems_i) + const double dy = ly / ((double)num_elems_j); // len/(num_elems_j) - // // --- Build nodes --- + global_num_elems = num_elems_i * num_elems_j; + + if(myrank==0){ + std::vector origin(num_dims); + // SimulationParameters.mesh_input.origin.update_host(); + for (int i = 0; i < num_dims; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } - // // populate the point data structures - // for (int j = 0; j < num_points_j; j++) { - // for (int i = 0; i < num_points_i; i++) { - // // global id for the point - // int node_gid = get_id(i, j, 0, num_points_i, num_points_j); - - // // store the point coordinates - // node.coords.host(node_gid, 0) = origin[0] + (double)i * dx; - // node.coords.host(node_gid, 1) = origin[1] + (double)j * dy; - // } // end for i - // } // end for j + // --- 3D parameters --- + // const int num_faces_in_elem = 6; // number of faces in elem + // const int num_points_in_elem = 8; // number of points in elem + // const int num_points_in_face = 4; // number of points in a face + // const int num_edges_in_elem = 12; // number of edges in a elem + + // node coords data on rank 0 for all global nodes + global_coords = CArrayKokkos(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); + // --- Build nodes --- - // node.coords.update_device(); + // populate the point data structures + for (int j = 0; j < num_points_j; j++) { + for (int i = 0; i < num_points_i; i++) { + // global id for the point + int node_gid = get_id(i, j, 0, num_points_i, num_points_j); - // // initialize elem variables - // mesh.initialize_elems(num_elems, num_dim); + // store the point coordinates + global_coords(node_gid, 0) = origin[0] + (double)i * dx; + global_coords(node_gid, 1) = origin[1] + (double)j * dy; + } // end for i + } // end for j - // // populate the elem center data structures - // for (int j = 0; j < num_elems_j; j++) { - // for (int i = 0; i < num_elems_i; i++) { - // // global id for the elem - // int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); - // // store the point IDs for this elem where the range is - // // (i:i+1, j:j+1 for a linear quad - // int this_point = 0; + // initialize elem variables + global_nodes_in_elem = CArrayKokkos(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); - // for (int jcount = j; jcount <= j + 1; jcount++) { - // for (int icount = i; icount <= i + 1; icount++) { - // // global id for the points - // int node_gid = get_id(icount, jcount, 0, num_points_i, num_points_j); + // --- Build elems --- - // // convert this_point index to the FE index convention - // int this_index = convert_point_number_in_quad(this_point); + // populate the elem center data structures + for (int j = 0; j < num_elems_j; j++) { + for (int i = 0; i < num_elems_i; i++) { + // global id for the elem + int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); + + // store the point IDs for this elem where the range is + // (i:i+1, j:j+1, k:k+1) for a linear hexahedron + int this_point = 0; + for (int jcount = j; jcount <= j + 1; jcount++) { + for (int icount = i; icount <= i + 1; icount++) { + // global id for the points + int node_gid = get_id(icount, jcount, 0, + num_points_i, num_points_j); + + // convert this_point index to the FE index convention + int this_index = convert_point_number_in_quad(this_point); - // // store the points in this elem according the the finite - // // element numbering convention - // mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + // store the points in this elem according the the finite + // element numbering convention + global_nodes_in_elem(elem_gid, this_index) = node_gid; - // // increment the point counting index - // this_point = this_point + 1; - // } // end for icount - // } // end for jcount - // } // end for i - // } // end for j + // increment the point counting index + this_point = this_point + 1; + } // end for icount + } // end for jcount + } // end for i + } // end for j + } - // // update device side - // mesh.nodes_in_elem.update_device(); + //distribute partitioned data from the global mesh build data on rank 0 + size_t num_local_nodes; + DistributedMap node_map; + // read coords + read_index_start = 0; + + buffer_iterations = global_num_nodes / BUFFER_SIZE; + if (global_num_nodes % BUFFER_SIZE != 0) + { + buffer_iterations++; + } - // // intialize corner variables - // int num_corners = num_elems * mesh.num_nodes_in_elem; - // mesh.initialize_corners(num_corners); - // // corner.initialize(num_corners, num_dim); + read_buffer = CArrayKokkos(BUFFER_SIZE, num_dims); - // } // end build_2d_box + { //scoped so temp FArray data is auto deleted to save memory + //allocate pre-partition node coords using contiguous decomposition + //FArray type used since CArray type still doesnt support zoltan2 decomposition + DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); - ///////////////////////////////////////////////////////////////////////////// - /// - /// \fn build_2d_polar - /// - /// \brief Builds an unstructured 2D polar mesh - /// - /// \param Simulation mesh that is built - /// \param Element state data - /// \param Node state data - /// \param Corner state data - /// \param Simulation parameters - /// - ///////////////////////////////////////////////////////////////////////////// - // void build_2d_polar(Mesh_t& mesh, - // GaussPoint_t& GaussPoints, - // node_t& node, - // corner_t& corner, - // SimulationParameters_t& SimulationParameters) const - // { - // printf("Creating a 2D polar mesh \n"); + // construct contiguous parallel row map now that we know the number of nodes + node_map = node_coords_distributed.pmap; + // map->describe(*fos,Teuchos::VERB_EXTREME); - // int num_dim = 2; + // set the vertices in the mesh read in + num_local_nodes = node_map.size(); + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) + { - // const double inner_radius = SimulationParameters.mesh_input.inner_radius; - // const double outer_radius = SimulationParameters.mesh_input.outer_radius; + for (int idim = 0; idim < num_dims; idim++) + { + // debug print + // std::cout<<" "<< substring < required_node_state = { node_state::coords }; + node_map = node_coords_distributed.pmap; + node.initialize(node_map, num_dims, required_node_state); + //copy coordinate data from repartitioned FArray into CArray + FOR_ALL(node_id, 0, node_map.size(), { + for(int idim = 0; idim < num_dims; idim++){ + node.coords(node_id,idim) = node_coords_distributed(node_id,idim); + } + }); + } - // const int num_nodes = num_points_i * num_points_j; + //initialize some mesh data + mesh.initialize_nodes(global_num_nodes); + num_local_nodes = node_map.size(); + mesh.num_local_nodes = num_local_nodes; + mesh.node_map = node_map; + mesh.num_dims = num_dims; + //node.coords.print(); + + // debug print of nodal data - // const double dx = (outer_radius - inner_radius) / ((double)num_elems_i); // len/(elems) - // const double dy = (end_angle - start_angle) / ((double)num_elems_j); // len/(elems) + // debug print nodal positions and indices + /* + std::cout << " ------------NODAL POSITIONS ON TASK " << myrank << " --------------"<getGlobalElement(inode) + 1 << " { "; + for (int istride = 0; istride < num_dims; istride++){ + std::cout << node_coords(inode,istride) << " , "; + } + std::cout << " }"<< std::endl; + } + */ - // const int num_elems = num_elems_i * num_elems_j; + // check that local assignments match global total - // std::vector origin(num_dim); + // read in element info (ensight file format is organized in element type sections) + // loop over this later for several element type sections + CArrayKokkos node_store(num_nodes_in_elem); - // for (int i = 0; i < num_dim; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + // broadcast number of elements + MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); - // // --- 2D parameters --- - // // const int num_faces_in_elem = 4; // number of faces in elem - // // const int num_points_in_elem = 4; // number of points in elem - // // const int num_points_in_face = 2; // number of points in a face - // // const int num_edges_in_elem = 4; // number of edges in a elem - - // // --- mesh node ordering --- - // // Convert ijk index system to the finite element numbering convention - // // for vertices in elem - // auto convert_point_number_in_quad = CArray(4); - // convert_point_number_in_quad(0) = 0; - // convert_point_number_in_quad(1) = 1; - // convert_point_number_in_quad(2) = 3; - // convert_point_number_in_quad(3) = 2; - - // // intialize node variables - // mesh.initialize_nodes(num_nodes); - - // // initialize node state, for now, we just need coordinates, the rest will be initialize by the respective solvers - // std::vector required_node_state = { node_state::coords }; - // node.initialize(num_nodes, num_dim, required_node_state); + //initialize num elem in mesh struct - // // populate the point data structures - // for (int j = 0; j < num_points_j; j++) { - // for (int i = 0; i < num_points_i; i++) { - // // global id for the point - // int node_gid = get_id(i, j, 0, num_points_i, num_points_j); + if (myrank == 0) + { + std::cout << "before mesh initialization" << std::endl; + } - // double r_i = inner_radius + (double)i * dx; - // double theta_j = start_angle + (double)j * dy; + // read in element connectivity + // we're gonna reallocate for the words per line expected for the element connectivity + read_buffer_edof = CArrayKokkos(BUFFER_SIZE, num_nodes_in_elem); - // // store the point coordinates - // node.coords.host(node_gid, 0) = origin[0] + r_i * cos(theta_j); - // node.coords.host(node_gid, 1) = origin[1] + r_i * sin(theta_j); + // calculate buffer iterations to read number of lines + buffer_iterations = global_num_elems / BUFFER_SIZE; + int assign_flag; - // if(node.coords.host(node_gid, 0) < 0.0){ - // throw std::runtime_error("**** NODE RADIUS FOR RZ MESH MUST BE POSITIVE ****"); - // } + // dynamic buffer used to store elements before we know how many this rank needs + std::vector element_temp(BUFFER_SIZE * num_nodes_in_elem); + std::vector global_indices_temp(BUFFER_SIZE); + size_t buffer_max = BUFFER_SIZE * num_nodes_in_elem; + size_t indices_buffer_max = BUFFER_SIZE; + size_t num_elems = 0; - // } // end for i - // } // end for j + if (global_num_elems % BUFFER_SIZE != 0) + { + buffer_iterations++; + } + read_index_start = 0; + // std::cout << "ELEMENT BUFFER ITERATIONS: " << buffer_iterations << std::endl; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { + read_buffer_edof(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_SIZE + buffer_loop, inode); + } + // std::cout <= buffer_max) + { + element_temp.resize((num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem); + buffer_max = (num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem; + } + element_temp[(num_elems - 1) * num_nodes_in_elem + inode] = node_store(inode); + // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; + } + // assign global element id to temporary list + if (num_elems - 1 >= indices_buffer_max) + { + global_indices_temp.resize(num_elems - 1 + BUFFER_SIZE); + indices_buffer_max = num_elems - 1 + BUFFER_SIZE; + } + global_indices_temp[num_elems - 1] = elem_gid; + } + } + read_index_start += BUFFER_SIZE; + } + //set global and local shared element counts + mesh.global_num_elems = global_num_elems; - // // populate the elem center data structures - // for (int j = 0; j < num_elems_j; j++) { - // for (int i = 0; i < num_elems_i; i++) { - // // global id for the elem - // int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); + // construct partition mapping for shared elements on each process + DCArrayKokkos All_Element_Global_Indices(num_elems); + // copy temporary global indices storage to view storage + for (int ielem = 0; ielem < num_elems; ielem++) + { + All_Element_Global_Indices.host(ielem) = global_indices_temp[ielem]; + } - // // store the point IDs for this elem where the range is - // // (i:i+1, j:j+1 for a linear quad - // int this_point = 0; + All_Element_Global_Indices.update_device(); - // for (int jcount = j; jcount <= j + 1; jcount++) { - // for (int icount = i; icount <= i + 1; icount++) { - // // global id for the points - // int node_gid = get_id(icount, jcount, 0, num_points_i, num_points_j); + // construct global map of local and shared elements (since different ranks can own the same elements due to the local node map) + DistributedMap element_map = DistributedMap(All_Element_Global_Indices); - // // convert this_point index to the FE index convention - // int this_index = convert_point_number_in_quad(this_point); + //initialize elem data structures + mesh.initialize_elems(num_elems, num_nodes_in_elem, element_map); - // // store the points in this elem according the the finite - // // element numbering convention - // mesh.nodes_in_elem.host(elem_gid, this_index) = node_gid; + // copy temporary element storage to distributed storage + DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; - // // increment the point counting index - // this_point = this_point + 1; - // } // end for icount - // } // end for jcount - // } // end for i - // } // end for j + for (int ielem = 0; ielem < num_elems; ielem++) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) + nodes_in_elem.host(ielem, inode) = element_temp[ielem * num_nodes_in_elem + inode]; + } + } - // // update device side - // mesh.nodes_in_elem.update_device(); + nodes_in_elem.update_device(); + + // delete temporary element connectivity and index storage + //std::vector().swap(element_temp); + //std::vector().swap(global_indices_temp); - // // intialize corner variables - // int num_corners = num_elems * mesh.num_nodes_in_elem; - // mesh.initialize_corners(num_corners); - // // corner.initialize(num_corners, num_dim); + // initialize corner variables + size_t num_corners = num_elems * num_nodes_in_elem; + mesh.initialize_corners(num_corners); + + } // end build_2d_box + + ///////////////////////////////////////////////////////////////////////////// + /// + /// \fn build_2d_polar + /// + /// \brief Builds an unstructured 2D polar mesh + /// + /// \param Simulation mesh that is built + /// \param Element state data + /// \param Node state data + /// \param Corner state data + /// \param Simulation parameters + /// + ///////////////////////////////////////////////////////////////////////////// + + void build_2d_polar(Mesh_t& mesh, + GaussPoint_t& GaussPoints, + node_t& node, + corner_t& corner, + SimulationParameters_t& SimulationParameters) const + { + int myrank, nranks; + MPI_Comm_rank(MPI_COMM_WORLD,&myrank); + MPI_Comm_size(MPI_COMM_WORLD,&nranks); + /*currently we just build the global mesh data on rank 0 and then broadcast relevant data to each rank + before the global mesh data on rank 0 falls out of scope*/ + int global_num_nodes, global_num_elems; + CArrayKokkos read_buffer; + CArrayKokkos read_buffer_edof; + CArrayKokkos global_coords; + CArrayKokkos global_nodes_in_elem; + auto convert_point_number_in_quad = CArray(4); + convert_point_number_in_quad(0) = 0; + convert_point_number_in_quad(1) = 1; + convert_point_number_in_quad(2) = 3; + convert_point_number_in_quad(3) = 2; + + int local_node_index, current_column_index; + int buffer_loop, buffer_iteration, buffer_iterations, dof_limit, scan_loop; + int negative_index_found = 0; + int global_negative_index_found = 0; + + size_t read_index_start, node_rid, elem_gid; + long long int node_gid; + real_t dof_value; + real_t unit_scaling = 1; + + const int num_dims = 2; + size_t num_nodes_in_elem = 1; + for (int dim = 0; dim < num_dims; dim++) { + num_nodes_in_elem *= 2; + } + if(myrank==0){ + printf("Creating a 2D box mesh \n"); + } + + // SimulationParameters.mesh_input.length.update_host(); + const double inner_radius = SimulationParameters.mesh_input.inner_radius; + const double outer_radius = SimulationParameters.mesh_input.outer_radius; + + const double start_angle = PI / 180.0 * SimulationParameters.mesh_input.starting_angle; + const double end_angle = PI / 180.0 * SimulationParameters.mesh_input.ending_angle; + + const int num_elems_i = SimulationParameters.mesh_input.num_radial_elems; + const int num_elems_j = SimulationParameters.mesh_input.num_angular_elems; + + const int num_points_i = num_elems_i + 1; // num points in x + const int num_points_j = num_elems_j + 1; // num points in y + + const double dx = (outer_radius - inner_radius) / ((double)num_elems_i); // len/(elems) + const double dy = (end_angle - start_angle) / ((double)num_elems_j); // len/(elems) + + global_num_elems = num_elems_i * num_elems_j; + + global_num_nodes = num_points_i * num_points_j; + + if(myrank==0){ + std::vector origin(num_dims); + // SimulationParameters.mesh_input.origin.update_host(); + for (int i = 0; i < num_dims; i++) { origin[i] = SimulationParameters.mesh_input.origin[i]; } + + // --- 3D parameters --- + // const int num_faces_in_elem = 6; // number of faces in elem + // const int num_points_in_elem = 8; // number of points in elem + // const int num_points_in_face = 4; // number of points in a face + // const int num_edges_in_elem = 12; // number of edges in a elem + + // node coords data on rank 0 for all global nodes + global_coords = CArrayKokkos(global_num_nodes, num_dims, "global_mesh_build_node_coordinates"); + + // --- Build nodes --- + + // populate the point data structures + for (int j = 0; j < num_points_j; j++) { + for (int i = 0; i < num_points_i; i++) { + // global id for the point + int node_gid = get_id(i, j, 0, num_points_i, num_points_j); + + double r_i = inner_radius + (double)i * dx; + double theta_j = start_angle + (double)j * dy; + + // store the point coordinates + global_coords(node_gid, 0) = origin[0] + r_i * cos(theta_j); + global_coords(node_gid, 1) = origin[1] + r_i * sin(theta_j); + + if(global_coords(node_gid, 0) < 0.0){ + throw std::runtime_error("**** NODE RADIUS FOR RZ MESH MUST BE POSITIVE ****"); + } + } // end for i + } // end for j + + + // initialize elem variables + global_nodes_in_elem = CArrayKokkos(global_num_elems, num_nodes_in_elem, "global_mesh_build_nodes_in_elem"); + + // --- Build elems --- + + // populate the elem center data structures + for (int j = 0; j < num_elems_j; j++) { + for (int i = 0; i < num_elems_i; i++) { + // global id for the elem + int elem_gid = get_id(i, j, 0, num_elems_i, num_elems_j); + + // store the point IDs for this elem where the range is + // (i:i+1, j:j+1, k:k+1) for a linear hexahedron + int this_point = 0; + for (int jcount = j; jcount <= j + 1; jcount++) { + for (int icount = i; icount <= i + 1; icount++) { + // global id for the points + int node_gid = get_id(icount, jcount, 0, + num_points_i, num_points_j); + + // convert this_point index to the FE index convention + int this_index = convert_point_number_in_quad(this_point); + + // store the points in this elem according the the finite + // element numbering convention + global_nodes_in_elem(elem_gid, this_index) = node_gid; + + // increment the point counting index + this_point = this_point + 1; + } // end for icount + } // end for jcount + } // end for i + } // end for j + } + + //distribute partitioned data from the global mesh build data on rank 0 + size_t num_local_nodes; + DistributedMap node_map; + // read coords + read_index_start = 0; + + buffer_iterations = global_num_nodes / BUFFER_SIZE; + if (global_num_nodes % BUFFER_SIZE != 0) + { + buffer_iterations++; + } + + read_buffer = CArrayKokkos(BUFFER_SIZE, num_dims); + + { //scoped so temp FArray data is auto deleted to save memory + //allocate pre-partition node coords using contiguous decomposition + //FArray type used since CArray type still doesnt support zoltan2 decomposition + DistributedDFArray node_coords_distributed(global_num_nodes, num_dims); + + // construct contiguous parallel row map now that we know the number of nodes + node_map = node_coords_distributed.pmap; + // map->describe(*fos,Teuchos::VERB_EXTREME); + + // set the vertices in the mesh read in + num_local_nodes = node_map.size(); + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) + { + + for (int idim = 0; idim < num_dims; idim++) + { + // debug print + // std::cout<<" "<< substring < required_node_state = { node_state::coords }; + node_map = node_coords_distributed.pmap; + node.initialize(node_map, num_dims, required_node_state); + //copy coordinate data from repartitioned FArray into CArray + FOR_ALL(node_id, 0, node_map.size(), { + for(int idim = 0; idim < num_dims; idim++){ + node.coords(node_id,idim) = node_coords_distributed(node_id,idim); + } + }); + } + + //initialize some mesh data + mesh.initialize_nodes(global_num_nodes); + num_local_nodes = node_map.size(); + mesh.num_local_nodes = num_local_nodes; + mesh.node_map = node_map; + mesh.num_dims = num_dims; + //node.coords.print(); + + // debug print of nodal data + + // debug print nodal positions and indices + /* + std::cout << " ------------NODAL POSITIONS ON TASK " << myrank << " --------------"<getGlobalElement(inode) + 1 << " { "; + for (int istride = 0; istride < num_dims; istride++){ + std::cout << node_coords(inode,istride) << " , "; + } + std::cout << " }"<< std::endl; + } + */ + + // check that local assignments match global total + + // read in element info (ensight file format is organized in element type sections) + // loop over this later for several element type sections + CArrayKokkos node_store(num_nodes_in_elem); + + // broadcast number of elements + MPI_Bcast(&global_num_elems, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); + + //initialize num elem in mesh struct + + if (myrank == 0) + { + std::cout << "before mesh initialization" << std::endl; + } + + // read in element connectivity + // we're gonna reallocate for the words per line expected for the element connectivity + read_buffer_edof = CArrayKokkos(BUFFER_SIZE, num_nodes_in_elem); + + // calculate buffer iterations to read number of lines + buffer_iterations = global_num_elems / BUFFER_SIZE; + int assign_flag; + + // dynamic buffer used to store elements before we know how many this rank needs + std::vector element_temp(BUFFER_SIZE * num_nodes_in_elem); + std::vector global_indices_temp(BUFFER_SIZE); + size_t buffer_max = BUFFER_SIZE * num_nodes_in_elem; + size_t indices_buffer_max = BUFFER_SIZE; + size_t num_elems = 0; + + if (global_num_elems % BUFFER_SIZE != 0) + { + buffer_iterations++; + } + read_index_start = 0; + // std::cout << "ELEMENT BUFFER ITERATIONS: " << buffer_iterations << std::endl; + for (buffer_iteration = 0; buffer_iteration < buffer_iterations; buffer_iteration++) + { + // pack buffer on rank 0 + if (myrank == 0 && buffer_iteration < buffer_iterations - 1) + { + for (buffer_loop = 0; buffer_loop < BUFFER_SIZE; buffer_loop++) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { + read_buffer_edof(buffer_loop,inode) = global_nodes_in_elem(buffer_iteration * BUFFER_SIZE + buffer_loop, inode); + } + // std::cout <= buffer_max) + { + element_temp.resize((num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem); + buffer_max = (num_elems - 1) * num_nodes_in_elem + inode + BUFFER_SIZE * num_nodes_in_elem; + } + element_temp[(num_elems - 1) * num_nodes_in_elem + inode] = node_store(inode); + // std::cout << "VECTOR STORAGE FOR ELEM " << num_elems << " ON TASK " << myrank << " NODE " << inode+1 << " IS " << node_store(inode) + 1 << std::endl; + } + // assign global element id to temporary list + if (num_elems - 1 >= indices_buffer_max) + { + global_indices_temp.resize(num_elems - 1 + BUFFER_SIZE); + indices_buffer_max = num_elems - 1 + BUFFER_SIZE; + } + global_indices_temp[num_elems - 1] = elem_gid; + } + } + read_index_start += BUFFER_SIZE; + } + //set global and local shared element counts + mesh.global_num_elems = global_num_elems; + + // construct partition mapping for shared elements on each process + DCArrayKokkos All_Element_Global_Indices(num_elems); + // copy temporary global indices storage to view storage + for (int ielem = 0; ielem < num_elems; ielem++) + { + All_Element_Global_Indices.host(ielem) = global_indices_temp[ielem]; + } + + All_Element_Global_Indices.update_device(); + + // construct global map of local and shared elements (since different ranks can own the same elements due to the local node map) + DistributedMap element_map = DistributedMap(All_Element_Global_Indices); + + //initialize elem data structures + mesh.initialize_elems(num_elems, num_nodes_in_elem, element_map); + + // copy temporary element storage to distributed storage + DistributedDCArray nodes_in_elem = mesh.nodes_in_elem; + + for (int ielem = 0; ielem < num_elems; ielem++) + { + for (int inode = 0; inode < num_nodes_in_elem; inode++) + { //assign local indices to element-node connectivity (stores global indices until ghost maps are made later) + nodes_in_elem.host(ielem, inode) = element_temp[ielem * num_nodes_in_elem + inode]; + } + } + + nodes_in_elem.update_device(); + + // delete temporary element connectivity and index storage + //std::vector().swap(element_temp); + //std::vector().swap(global_indices_temp); + + // initialize corner variables + size_t num_corners = num_elems * num_nodes_in_elem; + mesh.initialize_corners(num_corners); - // } // end build_2d_box + } // end build_2d_polar ///////////////////////////////////////////////////////////////////////////// /// From 4f311d32579e527ac3d363d53b15588898fe5f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 22 Sep 2025 20:32:43 -0600 Subject: [PATCH 58/66] BUG: undefined behavior in vtu read --- .../src/common/include/mesh_io.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index bf04d6a93..1c4b79291 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -1051,6 +1051,8 @@ class MeshReader { buffer_iterations++; } + + read_index_start = 0; //first find the block with node coords data if(myrank==0){ @@ -1511,7 +1513,7 @@ class MeshReader //std::cout << "Type read size " << size << std::endl; // check that the element type is supported by Fierro - FOR_ALL (elem_gid, 0, mesh.num_elems, { + FOR_ALL (elem_gid, 0, num_elems, { //std::cout << "Element type is " << elem_types(elem_gid) << std::endl; if(elem_types(elem_gid) == element_types::linear_quad || elem_types(elem_gid) == element_types::linear_hex_ijk || @@ -1531,7 +1533,7 @@ class MeshReader CArrayKokkos convert_ensight_to_ijk(8, "convert_ensight_to_ijk"); // Convert the arbitrary order hex to a IJK mesh - DCArrayKokkos convert_pn_vtk_to_ijk(mesh.num_nodes_in_elem, "convert_pn_vtk_to_ijk"); + DCArrayKokkos convert_pn_vtk_to_ijk(num_nodes_in_elem, "convert_pn_vtk_to_ijk"); //build the connectivity for element type 12 // elem_types.host(0) @@ -1579,13 +1581,13 @@ class MeshReader }); // read the node ids in the element - FOR_ALL (elem_id, 0, mesh.num_elems, { + FOR_ALL (elem_id, 0, num_elems, { long long int temp[num_nodes_in_elem]; - for (size_t node_lid=0; node_lid Date: Wed, 24 Sep 2025 14:10:43 -0600 Subject: [PATCH 59/66] BUG: set 2D member variable --- single-node-refactor/src/common/include/mesh_io.h | 1 + 1 file changed, 1 insertion(+) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 1c4b79291..030134509 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -616,6 +616,7 @@ class MeshReader num_local_nodes = node_map.size(); mesh.num_local_nodes = num_local_nodes; mesh.node_map = node_map; + mesh.num_dims = num_dims; // debug print of nodal data From 5445fcc394dd7250d456cc8d0ed6e7026ea06995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Thu, 25 Sep 2025 20:36:54 -0600 Subject: [PATCH 60/66] ENH: re-add contact regression tests with vtk input --- .../standard_inputs/confined_preload.yaml | 2 +- .../standard_inputs/edge_flat_contact.yaml | 2 +- .../meshes/edge_flat_contact.vtk | 42 +++++++++++++++++ .../standard_inputs/meshes/preload.vtk | 46 +++++++++++++++++++ .../meshes/sie_expansion_contact.vtk | 46 +++++++++++++++++++ .../meshes/slanted_bounce_contact.vtk | 36 +++++++++++++++ .../meshes/slanted_impact_contact.vtk | 36 +++++++++++++++ .../sie_expansion_contact.yaml | 2 +- .../slanted_bounce_contact.yaml | 2 +- .../slanted_impact_contact.yaml | 2 +- .../standard_inputs/unconfined_preload.yaml | 2 +- .../regression_tests/test_refactor.py | 5 +- 12 files changed, 216 insertions(+), 7 deletions(-) create mode 100644 single-node-refactor/regression_tests/standard_inputs/meshes/edge_flat_contact.vtk create mode 100644 single-node-refactor/regression_tests/standard_inputs/meshes/preload.vtk create mode 100644 single-node-refactor/regression_tests/standard_inputs/meshes/sie_expansion_contact.vtk create mode 100644 single-node-refactor/regression_tests/standard_inputs/meshes/slanted_bounce_contact.vtk create mode 100644 single-node-refactor/regression_tests/standard_inputs/meshes/slanted_impact_contact.vtk diff --git a/single-node-refactor/regression_tests/standard_inputs/confined_preload.yaml b/single-node-refactor/regression_tests/standard_inputs/confined_preload.yaml index 57c43f9cd..52dca23fa 100755 --- a/single-node-refactor/regression_tests/standard_inputs/confined_preload.yaml +++ b/single-node-refactor/regression_tests/standard_inputs/confined_preload.yaml @@ -10,7 +10,7 @@ dynamic_options: mesh_options: source: file - file_path: ./standard_inputs/meshes/preload.geo + file_path: ./standard_inputs/meshes/preload.vtk num_dims: 3 #mesh_options: diff --git a/single-node-refactor/regression_tests/standard_inputs/edge_flat_contact.yaml b/single-node-refactor/regression_tests/standard_inputs/edge_flat_contact.yaml index 8a3b931eb..06309a5c8 100644 --- a/single-node-refactor/regression_tests/standard_inputs/edge_flat_contact.yaml +++ b/single-node-refactor/regression_tests/standard_inputs/edge_flat_contact.yaml @@ -10,7 +10,7 @@ dynamic_options: mesh_options: source: file - file_path: ./standard_inputs/meshes/edge_flat_contact.geo + file_path: ./standard_inputs/meshes/edge_flat_contact.vtk num_dims: 3 #mesh_options: diff --git a/single-node-refactor/regression_tests/standard_inputs/meshes/edge_flat_contact.vtk b/single-node-refactor/regression_tests/standard_inputs/meshes/edge_flat_contact.vtk new file mode 100644 index 000000000..8e748c358 --- /dev/null +++ b/single-node-refactor/regression_tests/standard_inputs/meshes/edge_flat_contact.vtk @@ -0,0 +1,42 @@ +# vtk DataFile Version 3.0 +Hexahedral element example +ASCII +DATASET UNSTRUCTURED_GRID + +POINTS 24 float +-0.500000 -2.000000 1.000000 +-0.500000 0.000000 1.000000 +-0.500000 2.000000 1.000000 +-0.500000 -2.000000 0.000000 +-0.500000 0.000000 0.000000 +-0.500000 2.000000 0.000000 +0.500000 -2.000000 1.000000 +0.500000 0.000000 1.000000 +0.500000 2.000000 1.000000 +0.500000 -2.000000 0.000000 +0.500000 0.000000 0.000000 +0.500000 2.000000 0.000000 +-0.500000 -1.000000 2.500000 +-0.500000 0.000000 2.500000 +-0.500000 1.000000 2.500000 +-0.500000 1.000000 3.500000 +-0.500000 0.000000 3.500000 +-0.500000 -1.000000 3.500000 +0.500000 -1.000000 2.500000 +0.500000 0.000000 2.500000 +0.500000 1.000000 2.500000 +0.500000 1.000000 3.500000 +0.500000 0.000000 3.500000 +0.500000 -1.000000 3.500000 + +CELLS 4 32 +8 6 7 10 9 0 1 4 3 +8 7 8 11 10 1 2 5 4 +8 12 13 16 17 18 19 22 23 +8 13 14 15 16 19 20 21 22 + +CELL_TYPES 4 +12 +12 +12 +12 \ No newline at end of file diff --git a/single-node-refactor/regression_tests/standard_inputs/meshes/preload.vtk b/single-node-refactor/regression_tests/standard_inputs/meshes/preload.vtk new file mode 100644 index 000000000..73d7e0a6a --- /dev/null +++ b/single-node-refactor/regression_tests/standard_inputs/meshes/preload.vtk @@ -0,0 +1,46 @@ +# vtk DataFile Version 3.0 +Hexahedral element example +ASCII +DATASET UNSTRUCTURED_GRID + +POINTS 26 float +1.000000 -1.000000 0.000000 +1.000000 0.000000 0.000000 +1.000000 1.000000 0.000000 +1.000000 1.000000 1.000000 +1.000000 0.000000 1.000000 +1.000000 -1.000000 1.000000 +0.000000 -1.000000 0.000000 +0.000000 0.000000 0.000000 +0.000000 1.000000 0.000000 +0.000000 1.000000 1.000000 +0.000000 0.000000 1.000000 +0.000000 -1.000000 1.000000 +-1.000000 -1.000000 0.000000 +-1.000000 0.000000 0.000000 +-1.000000 1.000000 0.000000 +-1.000000 1.000000 1.000000 +-1.000000 0.000000 1.000000 +-1.000000 -1.000000 1.000000 +-0.250000 -0.250000 0.500000 +-0.250000 0.250000 0.500000 +-0.250000 0.250000 1.500000 +-0.250000 -0.250000 1.500000 +0.250000 -0.250000 0.500000 +0.250000 0.250000 0.500000 +0.250000 0.250000 1.500000 +0.250000 -0.250000 1.500000 + +CELLS 5 40 +8 6 7 10 11 0 1 4 5 +8 7 8 9 10 1 2 3 4 +8 12 13 16 17 6 7 10 11 +8 13 14 15 16 7 8 9 10 +8 18 19 20 21 22 23 24 25 + +CELL_TYPES 5 +12 +12 +12 +12 +12 \ No newline at end of file diff --git a/single-node-refactor/regression_tests/standard_inputs/meshes/sie_expansion_contact.vtk b/single-node-refactor/regression_tests/standard_inputs/meshes/sie_expansion_contact.vtk new file mode 100644 index 000000000..6e552fffe --- /dev/null +++ b/single-node-refactor/regression_tests/standard_inputs/meshes/sie_expansion_contact.vtk @@ -0,0 +1,46 @@ +# vtk DataFile Version 3.0 +Hexahedral element example +ASCII +DATASET UNSTRUCTURED_GRID + +POINTS 26 float +1.000000 -1.000000 0.000000 +1.000000 0.000000 0.000000 +1.000000 1.000000 0.000000 +1.000000 1.000000 1.000000 +1.000000 0.000000 1.000000 +1.000000 -1.000000 1.000000 +0.000000 -1.000000 0.000000 +0.000000 0.000000 0.000000 +0.000000 1.000000 0.000000 +0.000000 1.000000 1.000000 +0.000000 0.000000 1.000000 +0.000000 -1.000000 1.000000 +-1.000000 -1.000000 0.000000 +-1.000000 0.000000 0.000000 +-1.000000 1.000000 0.000000 +-1.000000 1.000000 1.000000 +-1.000000 0.000000 1.000000 +-1.000000 -1.000000 1.000000 +-0.250000 -0.250000 1.000010 +-0.250000 0.250000 1.000010 +-0.250000 0.250000 1.500000 +-0.250000 -0.250000 1.500000 +0.250000 -0.250000 1.000010 +0.250000 0.250000 1.000010 +0.250000 0.250000 1.500000 +0.250000 -0.250000 1.500000 + +CELLS 5 40 +8 6 7 10 11 0 1 4 5 +8 7 8 9 10 1 2 3 4 +8 12 13 16 17 6 7 10 11 +8 13 14 15 16 7 8 9 10 +8 18 19 20 21 22 23 24 25 + +CELL_TYPES 5 +12 +12 +12 +12 +12 \ No newline at end of file diff --git a/single-node-refactor/regression_tests/standard_inputs/meshes/slanted_bounce_contact.vtk b/single-node-refactor/regression_tests/standard_inputs/meshes/slanted_bounce_contact.vtk new file mode 100644 index 000000000..7669220f3 --- /dev/null +++ b/single-node-refactor/regression_tests/standard_inputs/meshes/slanted_bounce_contact.vtk @@ -0,0 +1,36 @@ +# vtk DataFile Version 3.0 +Hexahedral element example +ASCII +DATASET UNSTRUCTURED_GRID + +POINTS 20 float +-0.500000 -2.000000 1.000000 +-0.500000 0.000000 2.000000 +-0.500000 2.000000 1.000000 +-0.500000 -2.000000 0.000000 +-0.500000 0.000000 1.000000 +-0.500000 2.000000 0.000000 +0.500000 -2.000000 1.000000 +0.500000 0.000000 2.000000 +0.500000 2.000000 1.000000 +0.500000 -2.000000 0.000000 +0.500000 0.000000 1.000000 +0.500000 2.000000 0.000000 +0.000000 1.000000 1.600000 +0.000000 2.000000 1.600000 +0.000000 2.000000 2.850000 +0.000000 1.000000 2.850000 +0.450000 1.000000 1.600000 +0.450000 2.000000 1.600000 +0.450000 2.000000 2.850000 +0.450000 1.000000 2.850000 + +CELLS 3 24 +8 6 7 10 9 0 1 4 3 +8 7 8 11 10 1 2 5 4 +8 12 13 14 15 16 17 18 19 + +CELL_TYPES 3 +12 +12 +12 \ No newline at end of file diff --git a/single-node-refactor/regression_tests/standard_inputs/meshes/slanted_impact_contact.vtk b/single-node-refactor/regression_tests/standard_inputs/meshes/slanted_impact_contact.vtk new file mode 100644 index 000000000..cbdc76def --- /dev/null +++ b/single-node-refactor/regression_tests/standard_inputs/meshes/slanted_impact_contact.vtk @@ -0,0 +1,36 @@ +# vtk DataFile Version 3.0 +Hexahedral element example +ASCII +DATASET UNSTRUCTURED_GRID + +POINTS 20 float +-0.500000 -2.000000 1.000000 +-0.500000 0.000000 2.000000 +-0.500000 2.000000 1.000000 +-0.500000 -2.000000 0.000000 +-0.500000 0.000000 0.000000 +-0.500000 2.000000 0.000000 +0.500000 -2.000000 1.000000 +0.500000 0.000000 2.000000 +0.500000 2.000000 1.000000 +0.500000 -2.000000 0.000000 +0.500000 0.000000 0.000000 +0.500000 2.000000 0.000000 +-0.250000 0.000000 2.500000 +-0.250000 1.000000 2.500000 +-0.250000 1.000000 3.500000 +-0.250000 0.000000 3.500000 +0.250000 0.000000 2.500000 +0.250000 1.000000 2.500000 +0.250000 1.000000 3.500000 +0.250000 0.000000 3.500000 + +CELLS 3 24 +8 6 7 10 9 0 1 4 3 +8 7 8 11 10 1 2 5 4 +8 12 13 14 15 16 17 18 19 + +CELL_TYPES 3 +12 +12 +12 \ No newline at end of file diff --git a/single-node-refactor/regression_tests/standard_inputs/sie_expansion_contact.yaml b/single-node-refactor/regression_tests/standard_inputs/sie_expansion_contact.yaml index c488a26c2..b027afe6d 100755 --- a/single-node-refactor/regression_tests/standard_inputs/sie_expansion_contact.yaml +++ b/single-node-refactor/regression_tests/standard_inputs/sie_expansion_contact.yaml @@ -10,7 +10,7 @@ dynamic_options: mesh_options: source: file - file_path: ./standard_inputs/meshes/sie_expansion_contact.geo + file_path: ./standard_inputs/meshes/sie_expansion_contact.vtk num_dims: 3 #mesh_options: diff --git a/single-node-refactor/regression_tests/standard_inputs/slanted_bounce_contact.yaml b/single-node-refactor/regression_tests/standard_inputs/slanted_bounce_contact.yaml index 757047b84..756371543 100644 --- a/single-node-refactor/regression_tests/standard_inputs/slanted_bounce_contact.yaml +++ b/single-node-refactor/regression_tests/standard_inputs/slanted_bounce_contact.yaml @@ -10,7 +10,7 @@ dynamic_options: mesh_options: source: file - file_path: ./standard_inputs/meshes/slanted_bounce_contact.geo + file_path: ./standard_inputs/meshes/slanted_bounce_contact.vtk num_dims: 3 #mesh_options: diff --git a/single-node-refactor/regression_tests/standard_inputs/slanted_impact_contact.yaml b/single-node-refactor/regression_tests/standard_inputs/slanted_impact_contact.yaml index a28b388ac..df1d18652 100644 --- a/single-node-refactor/regression_tests/standard_inputs/slanted_impact_contact.yaml +++ b/single-node-refactor/regression_tests/standard_inputs/slanted_impact_contact.yaml @@ -10,7 +10,7 @@ dynamic_options: mesh_options: source: file - file_path: ./standard_inputs/meshes/slanted_impact_contact.geo + file_path: ./standard_inputs/meshes/slanted_impact_contact.vtk num_dims: 3 #mesh_options: diff --git a/single-node-refactor/regression_tests/standard_inputs/unconfined_preload.yaml b/single-node-refactor/regression_tests/standard_inputs/unconfined_preload.yaml index 24f7b4faf..8ded950df 100755 --- a/single-node-refactor/regression_tests/standard_inputs/unconfined_preload.yaml +++ b/single-node-refactor/regression_tests/standard_inputs/unconfined_preload.yaml @@ -10,7 +10,7 @@ dynamic_options: mesh_options: source: file - file_path: ./standard_inputs/meshes/preload.geo + file_path: ./standard_inputs/meshes/preload.vtk num_dims: 3 #mesh_options: diff --git a/single-node-refactor/regression_tests/test_refactor.py b/single-node-refactor/regression_tests/test_refactor.py index b20999910..4dec6a3ce 100644 --- a/single-node-refactor/regression_tests/test_refactor.py +++ b/single-node-refactor/regression_tests/test_refactor.py @@ -25,7 +25,10 @@ "Sedov", "Sod_X", "Sod_Y", "Sod_Z", "Sedov_Erosion", \ "Sedov_rz_polar", \ "Pressure_bc_box","vtu_read","SGTM_cooling_cube", \ - "lin_vol_frac_two_mat", "Bending-3D-plate", "Vel_bc_box"] + "lin_vol_frac_two_mat", "Bending-3D-plate", "Vel_bc_box", \ + "slanted_bounce_contact", "slanted_impact_contact", \ + "sie_expansion_contact", "confined_preload", "unconfined_preload", \ + "edge_flat_contact"] # Extract data from txt file def extract_state_data(filename): From be6f86cdb52d64b2c18a7a4cbb5c87680e934566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 26 Sep 2025 12:30:07 -0600 Subject: [PATCH 61/66] BUG: build cores env variable --- single-node-refactor/scripts/trilinos-install.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/single-node-refactor/scripts/trilinos-install.sh b/single-node-refactor/scripts/trilinos-install.sh index 1589d0f56..97960e8ff 100644 --- a/single-node-refactor/scripts/trilinos-install.sh +++ b/single-node-refactor/scripts/trilinos-install.sh @@ -168,7 +168,7 @@ cmake "${cmake_options[@]}" -B "${TRILINOS_BUILD_DIR}" -S "${TRILINOS_SOURCE_DIR # Build Trilinos echo "Building Trilinos..." -make -C "${TRILINOS_BUILD_DIR}" -j${MATAR_BUILD_CORES} +make -C "${TRILINOS_BUILD_DIR}" -j${FIERRO_BUILD_CORES} # Install Trilinos echo "Installing Trilinos..." From d1415905867a77f2d016b458cd449cc0548c6593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 26 Sep 2025 12:35:51 -0600 Subject: [PATCH 62/66] BUG: build cores env variable --- single-node-refactor/scripts/cmake_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/single-node-refactor/scripts/cmake_build.sh b/single-node-refactor/scripts/cmake_build.sh index 8fdfa8784..09b724078 100755 --- a/single-node-refactor/scripts/cmake_build.sh +++ b/single-node-refactor/scripts/cmake_build.sh @@ -48,6 +48,6 @@ echo "CMake Options: ${cmake_options[@]}" cmake "${cmake_options[@]}" -B "${SGH_BUILD_DIR}" -S "${SGH_BASE_DIR}" # Build SGH -make -C "${SGH_BUILD_DIR}" -j${SGH_BUILD_CORES} +make -C "${SGH_BUILD_DIR}" -j${FIERRO_BUILD_CORES} cd $basedir From ed8cee99dd7090f5818d72a61f180aa7dcdd61d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Fri, 26 Sep 2025 19:50:15 -0600 Subject: [PATCH 63/66] BUG: device updates --- single-node-refactor/src/common/include/mesh.h | 4 ++-- single-node-refactor/src/common/include/mesh_io.h | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index c47150584..e10bf8b90 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -417,7 +417,7 @@ struct Mesh_t // set nodes per element for (int node_lid = 0; node_lid < num_nodes_in_elem; node_lid++) { - node_gid = nodes_in_elem(cell_rid, node_lid); //nodes in elem still stores global indices + node_gid = nodes_in_elem.host(cell_rid, node_lid); //nodes in elem still stores global indices if (!node_map.isProcessGlobalIndex(node_gid)) { ghost_node_set.insert(node_gid); @@ -509,7 +509,7 @@ struct Mesh_t for (int lnode = 0; lnode < num_nodes_in_elem; lnode++) { - node_gid = nodes_in_elem(ielem, lnode); + node_gid = nodes_in_elem.host(ielem, lnode); if (ghost_node_map.isProcessGlobalIndex(node_gid)) { local_node_index = ghost_node_map.getLocalIndex(node_gid); diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 030134509..840eadb67 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -610,7 +610,7 @@ class MeshReader } }); } - node.coords.update_device(); + node.coords.update_host(); //initialize some mesh data mesh.initialize_nodes(global_num_nodes); num_local_nodes = node_map.size(); @@ -1156,6 +1156,7 @@ class MeshReader } } + node_coords_distributed.update_device(); // repartition node distribution node_coords_distributed.repartition_vector(); //get map from repartitioned Farray and feed it into distributed CArray type; FArray data will be discared after scope @@ -1583,11 +1584,11 @@ class MeshReader // read the node ids in the element FOR_ALL (elem_id, 0, num_elems, { - long long int temp[num_nodes_in_elem]; - for (size_t node_lid=0; node_lid(host_local_element_map, num_scalar_vars); + DistributedCArray elem_fields = DistributedCArray(host_local_element_map, num_scalar_vars); int elem_switch = 1; DCArrayKokkos speed(num_elems, "speed"); From 6477c7102922a5467342d83ee83aaa303055ec90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sat, 27 Sep 2025 12:24:44 -0600 Subject: [PATCH 64/66] BUG: writer gpu bugs --- .../src/common/include/mesh.h | 2 +- .../src/common/include/mesh_io.h | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh.h b/single-node-refactor/src/common/include/mesh.h index e10bf8b90..8552af0f3 100644 --- a/single-node-refactor/src/common/include/mesh.h +++ b/single-node-refactor/src/common/include/mesh.h @@ -603,7 +603,7 @@ struct Mesh_t element_map = DistributedMap(Initial_Element_Global_Indices); //redefine nodes_in_elem so partition map of the distributed array is synchronized with permuted dual view contents - DistributedDCArray nodes_in_elem_temp(element_map, num_nodes_in_elem); + DistributedDCArray nodes_in_elem_temp(element_map, num_nodes_in_elem, "nodes_in_elem"); //nodes_in_elem_temp.replace_kokkos_dual_view(nodes_in_elem.get_kokkos_dual_view()); //nodes_in_elem.print(); std::cout << "NUM ELEMS " << num_elems << " NUM NODES IN ELEM " << num_nodes_in_elem << std::endl; diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 840eadb67..34d072bf2 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -4226,7 +4226,7 @@ class MeshWriter //convert local node ids in nodes in elem so they correspond to nonoverlap map for (size_t elem_id = 0; elem_id < num_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - nonoverlap_nodes_in_elem(elem_id, node_lid) = nonoverlap_elem_node_map.getLocalIndex(all_node_map.getGlobalIndex(mesh.nodes_in_elem(elem_id, node_lid))); + nonoverlap_nodes_in_elem.host(elem_id, node_lid) = nonoverlap_elem_node_map.getLocalIndex(all_node_map.getGlobalIndex(mesh.nodes_in_elem.host(elem_id, node_lid))); } } // end for elem_gid @@ -4318,7 +4318,7 @@ class MeshWriter // global_indices_of_local_mat_elems(ielem) = mesh.element_map(State.MaterialToMeshMaps.elem_in_mat_elem(mat_id, ielem)); // }); for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem(mat_id, ielem)); + global_indices_of_local_mat_elems(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); } global_indices_of_local_mat_elems.update_device(); DistributedMap mat_elem_map = DistributedMap(global_indices_of_local_mat_elems); @@ -4370,7 +4370,7 @@ class MeshWriter //convert mesh.nodes_in_elem stores local indices and we communicated these in, convert to global for (size_t elem_id = 0; elem_id < num_mat_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - mat_nodes_in_mat_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mat_nodes_in_mat_elem(elem_id, node_lid)); + mat_nodes_in_mat_elem.host(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mat_nodes_in_mat_elem.host(elem_id, node_lid)); } } // end for elem_gid @@ -4406,7 +4406,7 @@ class MeshWriter //convert mat_nodes_in_mat_elem so it uses contiguous local node ids for this mat portion of the mesh for (size_t elem_id = 0; elem_id < num_mat_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - mat_nodes_in_mat_elem(elem_id, node_lid) = mat_node_map.getLocalIndex(mat_nodes_in_mat_elem(elem_id, node_lid)); + mat_nodes_in_mat_elem.host(elem_id, node_lid) = mat_node_map.getLocalIndex(mat_nodes_in_mat_elem.host(elem_id, node_lid)); } } // end for elem_gid @@ -5155,12 +5155,12 @@ class MeshWriter //host version of local element map for argument compatibility HostDistributedMap host_local_element_map; - DCArrayKokkos global_indices_of_local_elements(mesh.num_local_elems); + DCArrayKokkos global_indices_of_local_elements(mesh.num_local_elems, "global_indices_of_local_elements"); for(int ielem = 0; ielem < mesh.num_local_elems; ielem++){ global_indices_of_local_elements(ielem) = mesh.local_element_map.getGlobalIndex(ielem); } host_local_element_map = HostDistributedMap(global_indices_of_local_elements); - DistributedCArray elem_fields = DistributedCArray(host_local_element_map, num_scalar_vars); + DistributedCArray elem_fields = DistributedCArray(host_local_element_map, num_scalar_vars, "elem_fields"); int elem_switch = 1; DCArrayKokkos speed(num_elems, "speed"); @@ -5235,17 +5235,17 @@ class MeshWriter collective_elem_map = HostDistributedMap(mesh.global_num_elems, num_collective_elem_indices); //collective vector and comms to the collective vector for elem fields - DistributedCArray collective_elem_fields(collective_elem_map, num_scalar_vars); + DistributedCArray collective_elem_fields(collective_elem_map, num_scalar_vars, "collective_elem_fields"); HostCommPlanLR collective_elem_comms(collective_elem_fields, elem_fields); collective_elem_comms.execute_comms(); //host of node in elem for Trilinos template argument compatibility - DistributedCArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem); + DistributedCArray host_local_nodes_in_elem(host_local_element_map, mesh.num_nodes_in_elem, "host_local_nodes_in_elem"); //convert nodes in elem back to global (convert back to local after we've collected global ids in collective vector) for (size_t elem_id = 0; elem_id < mesh.num_local_elems; elem_id++) { for (int node_lid = 0; node_lid < mesh.num_nodes_in_elem; node_lid++) { - host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem(elem_id, node_lid)); + host_local_nodes_in_elem(elem_id, node_lid) = mesh.all_node_map.getGlobalIndex(mesh.local_nodes_in_elem.host(elem_id, node_lid)); } } // end for elem_gid @@ -5258,14 +5258,14 @@ class MeshWriter //NODE DATA COLLECTION //host version of local node map for argument compatibility HostDistributedMap host_node_map; - DCArrayKokkos global_indices_of_local_nodes(mesh.num_local_nodes); + DCArrayKokkos global_indices_of_local_nodes(mesh.num_local_nodes, "global_indices_of_local_nodes"); for(int inode = 0; inode < mesh.num_local_nodes; inode++){ global_indices_of_local_nodes(inode) = mesh.node_map.getGlobalIndex(inode); } host_node_map = HostDistributedMap(global_indices_of_local_nodes); // save the vertex vector fields to an array for exporting to graphics files - DistributedCArray vec_fields(host_node_map, num_vec_vars, 3); + DistributedCArray vec_fields(host_node_map, num_vec_vars, 3, "vec_fields"); for (size_t node_gid = 0; node_gid < mesh.num_local_nodes; node_gid++) { // position, var 0 @@ -5308,7 +5308,7 @@ class MeshWriter collective_node_map = HostDistributedMap(mesh.global_num_nodes, num_collective_node_indices); //collective vector and comms to the collective vector for node fields - DistributedCArray collective_vec_fields(collective_node_map, num_vec_vars, 3); + DistributedCArray collective_vec_fields(collective_node_map, num_vec_vars, 3, "collective_vec_fields"); HostCommPlanLR collective_node_comms(collective_vec_fields, vec_fields); collective_node_comms.execute_comms(); From 2ac1f0bcaf877f25fbe2331d3894d5640885f219 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Sun, 28 Sep 2025 00:44:00 -0600 Subject: [PATCH 65/66] BUG: vtu write with gpu --- single-node-refactor/src/common/include/mesh_io.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/single-node-refactor/src/common/include/mesh_io.h b/single-node-refactor/src/common/include/mesh_io.h index 34d072bf2..d4feb4f20 100644 --- a/single-node-refactor/src/common/include/mesh_io.h +++ b/single-node-refactor/src/common/include/mesh_io.h @@ -4313,12 +4313,12 @@ class MeshWriter //set global element indices on this rank for this mat DistributedMap element_map = mesh.element_map; - DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems); + DCArrayKokkos global_indices_of_local_mat_elems(num_mat_local_elems, " global_indices_of_local_mat_elems"); // FOR_ALL(ielem, 0, num_mat_local_elems,{ // global_indices_of_local_mat_elems(ielem) = mesh.element_map(State.MaterialToMeshMaps.elem_in_mat_elem(mat_id, ielem)); // }); for(int ielem = 0; ielem < num_mat_local_elems; ielem++){ - global_indices_of_local_mat_elems(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); + global_indices_of_local_mat_elems.host(ielem) = mesh.element_map.getGlobalIndex(State.MaterialToMeshMaps.elem_in_mat_elem.host(mat_id, ielem)); } global_indices_of_local_mat_elems.update_device(); DistributedMap mat_elem_map = DistributedMap(global_indices_of_local_mat_elems); @@ -7383,7 +7383,7 @@ class MeshWriter // create a Map for ghost node indices mat_node_indices = DCArrayKokkos(num_mat_nodes, "mat_nodes"); while (it != mat_node_set.end()) { - mat_node_indices(ighost++) = *it; + mat_node_indices.host(ighost++) = *it; it++; } mat_node_indices.update_device(); From ebe8ece11a50a061ffdeef82e4b717d9a89fdc8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CAdrian-Diaz=E2=80=9D?= <“adriandiaz1117@gmail.com”> Date: Mon, 29 Sep 2025 00:06:02 -0600 Subject: [PATCH 66/66] BUG: remove change intended for other branch --- .../Kinetic_Energy_Minimize_Shape_Opt.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h b/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h index fe98ee64f..1151f47b1 100644 --- a/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h +++ b/src/Parallel-Solvers/Parallel-Explicit/Topology_Optimization/Kinetic_Energy_Minimize_Shape_Opt.h @@ -533,16 +533,13 @@ typedef MV::dual_view_type dual_vec_array; } } } - + // cut out the node_gids for this element ViewCArrayKokkos elem_node_gids(&nodes_in_elem(elem_id, 0), 8); // gradients of the element volume FEM_SGH_->get_vol_hex_ugradient(volume_gradients, elem_id, node_coords, elem_node_gids, rk_level); - // gradients of the element volume - FEM_SGH_->get_vol_hex_ugradient(volume_gradients, elem_id, node_coords, elem_node_gids, rk_level); - for (int inode = 0; inode < num_nodes_in_elem; inode++) { for(int idim = 0; idim < num_dim; idim++){ // compute gradient of local element contribution to v^t*M*v product