From 0e1ecf1457f6055521e8dc0e0b78572977c00c8f Mon Sep 17 00:00:00 2001 From: James Le Houx Date: Fri, 29 May 2026 07:02:36 +0000 Subject: [PATCH] session: NULL-terminate argv passed to amrex::Initialize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit POSIX and MPI both require argv to be NULL-terminated (argv[argc] == nullptr). My previous commit constructed argv from a vector and forgot the terminator. OpenMPI's opal_argv_join (called inside MPI_Init) iterates argv looking for NULL — without it, it reads past the vector into uninitialised memory and segfaults intermittently depending on heap layout (subprocess happened to get NULL by luck, Jupyter kernel did not). Stack trace from the failing case: [0] opal_argv_join+0x35 [1] ompi_mpi_init+0xb5e [2] MPI_Init+0x72 [3] amrex::ParallelDescriptor::StartParallel [4] amrex::Initialize --- python/bindings/module.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/bindings/module.cpp b/python/bindings/module.cpp index 8b757a1..a1bc1cc 100644 --- a/python/bindings/module.cpp +++ b/python/bindings/module.cpp @@ -59,10 +59,15 @@ static void init_amrex() { "amrex.the_arena_init_size=0", }; std::vector argv_ptrs; + argv_ptrs.reserve(argv_storage.size() + 1); for (auto& s : argv_storage) { argv_ptrs.push_back(s.data()); } - int argc = static_cast(argv_ptrs.size()); + // POSIX/MPI convention: argv must be NULL-terminated. Without + // this, opal_argv_join (called by MPI_Init) reads past the end + // and segfaults intermittently depending on heap layout. + argv_ptrs.push_back(nullptr); + int argc = static_cast(argv_storage.size()); char** argv = argv_ptrs.data(); amrex::Initialize(argc, argv); }