From 8ae3385bfe00eb2163cf1a07e66be77f72603adc Mon Sep 17 00:00:00 2001 From: Benjamin Brock Date: Tue, 17 Mar 2026 15:04:24 +0000 Subject: [PATCH 1/2] ishmem: add explicit init_attr device selection --- README.md | 4 +- docs/source/execution_model.rst | 5 +- docs/source/library_setup_exit_query.rst | 8 +- examples/3_library_apis.cpp | 6 +- src/accelerator.cpp | 112 ++++++++++++++++++----- src/accelerator.h | 6 +- src/ishmem.cpp | 2 +- src/ishmemx.h | 2 + src/memory.cpp | 2 +- src/on_queue.h | 3 + 10 files changed, 115 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index b2822e1..3cedcd2 100644 --- a/README.md +++ b/README.md @@ -146,8 +146,8 @@ ISHMEM_RUNTIME= mpiexec.hydra -n 2 -hosts ./scripts - *Note:* Current supported launchers include: MPI process launchers (i.e. `mpiexec`, `mpiexec.hydra`, `mpirun`, etc.), Slurm (i.e. `srun`, `salloc`, etc.), and PBS (i.e. `qsub`). -- *Note:* Intel® SHMEM execution model requires applications to use a 1:1 mapping between PEs and GPU devices. Attempting to run an application without the `ishmrun` launch script may result in failure if this mapping is not maintained. - - For further details on device selection, please see [the ONEAPI_DEVICE_SELECTOR](https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#oneapi_device_selector). +- *Note:* Intel® SHMEM execution model requires applications to use a 1:1 mapping between PEs and GPU devices. The default `ishmrun` launcher maintains that mapping by restricting each PE to a single visible GPU device. When multiple GPUs remain visible to a PE, set `ishmemx_attr_t.device_id` explicitly before calling `ishmemx_init_attr`. + - For further details on launcher-based device selection, please see [the ONEAPI_DEVICE_SELECTOR](https://github.com/intel/llvm/blob/sycl/sycl/doc/EnvironmentVariables.md#oneapi_device_selector). 3. Validate the application ran successfully; example output: diff --git a/docs/source/execution_model.rst b/docs/source/execution_model.rst index f9bfdcf..06bd01b 100644 --- a/docs/source/execution_model.rst +++ b/docs/source/execution_model.rst @@ -21,7 +21,9 @@ library`. that assigns the environment variable **ZE_AFFINITY_MASK** so that each PE is assigned a single SYCL device. Usage of this script is described in Section :ref:`Compiling and Running - Programs`. + Programs`. When multiple GPUs remain + visible to a PE, applications must select one explicitly with + ``ishmemx_attr_t.device_id`` before calling ``ishmemx_init_attr``. .. note:: Intel® Data Center GPU Max Series devices utilize a multi-tile architecture (as of Intel® SHMEM v1.0.0 with 1 or 2 tiles). By default, @@ -110,4 +112,3 @@ operations, see :ref:`Memory Ordering`. .. segment. .. For example, objects located in the symmetric data segment and objects .. located in the symmetric heap can be provided as arguments to the same OpenSHMEM operation. - diff --git a/docs/source/library_setup_exit_query.rst b/docs/source/library_setup_exit_query.rst index 3374fd1..222acb7 100644 --- a/docs/source/library_setup_exit_query.rst +++ b/docs/source/library_setup_exit_query.rst @@ -58,6 +58,7 @@ OpenSHMEM, MPI, or PMI. .. c:var:: ishmemx_runtime_type_t runtime .. c:var:: bool initialize_runtime = true .. c:var:: bool gpu = true + .. c:var:: int device_id = -1 .. c:var:: void *mpi_comm **Description:** @@ -68,7 +69,11 @@ library. By default, the parallel runtime is initialized by Intel® SHMEM (**initialize_runtime** default is ``true``). The **gpu** boolean indicates whether to use GPU memory for the symmetric -heap (default is ``true``). **mpi_comm** is a pointer to the corresponding +heap (default is ``true``). ``device_id`` selects the visible GPU ordinal to +use when multiple GPU devices are visible to a PE. The default value of ``-1`` +preserves the traditional single-visible-device behavior: Intel® SHMEM will +auto-select the device only when exactly one GPU is visible. **mpi_comm** is a +pointer to the corresponding MPI communicator for representing ``ISHMEM_TEAM_WORLD`` when used with ``ISHMEM_RUNTIME_MPI`` (default is ``MPI_COMM_WORLD``). @@ -283,4 +288,3 @@ character. If the **name** memory buffer is provided with size less than ISHMEM_MAX_NAME_LEN, behavior is undefined. For a given library implementation, the vendor string returned is consistent with the library constant ISHMEM_VENDOR_STRING. - diff --git a/examples/3_library_apis.cpp b/examples/3_library_apis.cpp index a25836a..33758b2 100644 --- a/examples/3_library_apis.cpp +++ b/examples/3_library_apis.cpp @@ -16,8 +16,10 @@ int main() << std::endl; /* Initialize ISHMEM - * The ISHMEM launch script will set things up so that ishmem uses - * the same GPU device as the SYCL queue above + * The default ishmrun launch script will set things up so that ishmem uses + * the same GPU device as the SYCL queue above. When multiple GPUs are + * visible to each PE, use ishmemx_attr_t.device_id to select the device + * explicitly before calling ishmemx_init_attr(). */ ishmem_init(); diff --git a/src/accelerator.cpp b/src/accelerator.cpp index 26f0274..158a95f 100644 --- a/src/accelerator.cpp +++ b/src/accelerator.cpp @@ -6,6 +6,7 @@ #include "accelerator.h" #include #include +#include /* TODO: Workaround to resolve compiler limitation. Need to be fixed later */ #if __INTEL_CLANG_COMPILER <= 20210400 @@ -15,12 +16,21 @@ #endif namespace { + struct ishmemi_visible_gpu_t { + ze_driver_handle_t driver = nullptr; + ze_device_handle_t device = nullptr; + ze_device_properties_t properties = {}; + uint32_t driver_idx = 0; + }; + /* L0 driver */ ze_driver_handle_t *all_drivers = nullptr; ze_device_handle_t **all_devices = nullptr; uint32_t driver_count = 0; uint32_t driver_idx = 0; bool driver_found = false; + std::vector visible_gpus; + int selected_device_id = -1; /* L0 device */ ze_device_properties_t device_properties = {}; @@ -95,10 +105,30 @@ static inline uint32_t get_next_link_index() return index; } +sycl::device ishmemi_get_selected_sycl_device() +{ + return sycl::make_device(ishmemi_gpu_device); +} + +void ishmemi_validate_queue_device(const sycl::queue &q) +{ + try { + auto queue_device = + sycl::get_native(q.get_device()); + if (queue_device != ishmemi_gpu_device) { + RAISE_ERROR_MSG( + "Queue device does not match the selected ISHMEM device. Set " + "ishmemx_attr_t.device_id to the queue device ordinal.\n"); + } + } catch (const sycl::exception &) { + RAISE_ERROR_MSG("Queue device is not a Level Zero GPU device\n"); + } +} + int ishmemi_accelerator_preinit() { int ret = 0; - uint32_t i; + uint32_t i, j; uint32_t device_count = 0; ze_init_flag_t flags = ZE_INIT_FLAG_GPU_ONLY; @@ -134,44 +164,44 @@ int ishmemi_accelerator_preinit() ZE_CHECK(zeDriverGet(&driver_count, all_drivers)); ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); - /* Parse the drivers for a suitable driver */ + visible_gpus.clear(); + + /* Parse the drivers for visible GPU devices */ for (i = 0; i < driver_count; i++) { device_count = 0; ZE_CHECK(zeDeviceGet(all_drivers[i], &device_count, nullptr)); ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); if (device_count == 0) continue; - /* Ensure a single device is detected */ - ISHMEM_CHECK_GOTO_MSG(device_count != 1, fn_fail, "Detected more than one device\n"); all_devices[i] = (ze_device_handle_t *) ::malloc(device_count * sizeof(ze_device_handle_t)); - ISHMEM_CHECK_GOTO_MSG(all_devices == nullptr, fn_fail, + ISHMEM_CHECK_GOTO_MSG(all_devices[i] == nullptr, fn_fail, "Allocation of all_drivers[%d] failed\n", i); ZE_CHECK(zeDeviceGet(all_drivers[i], &device_count, all_devices[i])); ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); - ZE_CHECK(zeDeviceGetProperties(all_devices[i][0], &device_properties)); - ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); - - if (ZE_DEVICE_TYPE_GPU == device_properties.type && !driver_found) { - ishmemi_gpu_driver = all_drivers[i]; - driver_idx = i; - driver_found = true; + for (j = 0; j < device_count; ++j) { + ze_device_properties_t props = {}; + ZE_CHECK(zeDeviceGetProperties(all_devices[i][j], &props)); + ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); + + if (ZE_DEVICE_TYPE_GPU == props.type) { + visible_gpus.push_back({ + .driver = all_drivers[i], + .device = all_devices[i][j], + .properties = props, + .driver_idx = i, + }); + } } } - if (!driver_found) { + if (visible_gpus.empty()) { ISHMEM_ERROR_MSG("No ZE driver found for GPU\n"); ret = ISHMEMI_NO_DEVICES; goto fn_fail; } - /* Create the ZE context */ - ishmemi_ze_context_desc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC; - - ZE_CHECK(zeContextCreate(ishmemi_gpu_driver, &ishmemi_ze_context_desc, &ishmemi_ze_context)); - ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); - fn_exit: ishmemi_accelerator_preinitialized = true; return ret; @@ -181,7 +211,7 @@ int ishmemi_accelerator_preinit() goto fn_exit; } -int ishmemi_accelerator_init() +int ishmemi_accelerator_init(const ishmemx_attr_t *attr) { int ret = 0; uint32_t i, j; @@ -192,9 +222,37 @@ int ishmemi_accelerator_init() ret = ishmemi_accelerator_preinit(); ISHMEMI_CHECK_RESULT(ret, 0, fn_exit); - if (driver_found) { - /* Set the default GPU */ - ishmemi_gpu_device = all_devices[driver_idx][0]; + if (!ishmemi_accelerator_initialized) { + ISHMEM_CHECK_GOTO_MSG(attr == nullptr, fn_fail, + "Accelerator initialization requires non-null attributes\n"); + ISHMEM_CHECK_GOTO_MSG(attr->device_id < -1, fn_fail, + "Invalid device_id %d provided in ishmemx_attr_t\n", + attr->device_id); + + if (attr->device_id == -1) { + ISHMEM_CHECK_GOTO_MSG( + visible_gpus.size() != 1, fn_fail, + "Detected %zu visible GPU devices. Set ishmemx_attr_t.device_id to select one.\n", + visible_gpus.size()); + selected_device_id = 0; + } else { + ISHMEM_CHECK_GOTO_MSG( + static_cast(attr->device_id) >= visible_gpus.size(), fn_fail, + "Requested device_id %d is out of range for %zu visible GPU devices\n", + attr->device_id, visible_gpus.size()); + selected_device_id = attr->device_id; + } + + const auto &selected_device = visible_gpus[static_cast(selected_device_id)]; + ishmemi_gpu_driver = selected_device.driver; + ishmemi_gpu_device = selected_device.device; + device_properties = selected_device.properties; + driver_idx = selected_device.driver_idx; + driver_found = true; + + ishmemi_ze_context_desc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC; + ZE_CHECK(zeContextCreate(ishmemi_gpu_driver, &ishmemi_ze_context_desc, &ishmemi_ze_context)); + ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); /* Discover command queue groups */ ZE_CHECK( @@ -289,10 +347,11 @@ int ishmemi_accelerator_init() ISHMEMI_CHECK_RESULT(ret, 0, fn_fail); fn_exit: - ishmemi_accelerator_initialized = true; + ishmemi_accelerator_initialized = (ret == 0); return ret; fn_fail: ishmemi_accelerator_fini(); + if (!ret) ret = 1; goto fn_exit; } @@ -326,12 +385,17 @@ int ishmemi_accelerator_fini(void) ISHMEMI_FREE(::free, all_devices[i]); ISHMEMI_FREE(::free, all_devices); ISHMEMI_FREE(::free, all_drivers); + visible_gpus.clear(); ishmemi_accelerator_preinitialized = false; ishmemi_accelerator_initialized = false; driver_found = false; driver_idx = 0; driver_count = 0; + selected_device_id = -1; + ishmemi_gpu_driver = nullptr; + ishmemi_gpu_device = nullptr; + device_properties = {}; if (ishmemi_ze_context) { ZE_CHECK(zeContextDestroy(ishmemi_ze_context)); diff --git a/src/accelerator.h b/src/accelerator.h index 9512e9e..fa138b9 100644 --- a/src/accelerator.h +++ b/src/accelerator.h @@ -59,12 +59,16 @@ static inline void ishmemi_print_device_properties(const ze_device_properties_t /* Initialize accelerator */ int ishmemi_accelerator_preinit(void); -int ishmemi_accelerator_init(void); +int ishmemi_accelerator_init(const ishmemx_attr_t *attr); /* Finalize accelerator */ int ishmemi_accelerator_fini(void); int ishmemi_accelerator_postfini(void); +/* Selected-device helpers */ +sycl::device ishmemi_get_selected_sycl_device(); +void ishmemi_validate_queue_device(const sycl::queue &q); + /* Query allocation memory type */ int ishmemi_get_memory_type(const void *ptr, ze_memory_type_t *type); diff --git a/src/ishmem.cpp b/src/ishmem.cpp index 44b6571..42d8d76 100644 --- a/src/ishmem.cpp +++ b/src/ishmem.cpp @@ -280,7 +280,7 @@ static void ishmemi_init(ishmemx_attr_t *attr, bool user_attr) ishmemi_cpu_info->n_pes = ishmemi_n_pes; if (attr->gpu) { - ret = ishmemi_accelerator_init(); + ret = ishmemi_accelerator_init(attr); if (ret == ISHMEMI_NO_DEVICE_ACCESS) { attr->gpu = false; /* TODO need to enable SHARED HEAP config */ diff --git a/src/ishmemx.h b/src/ishmemx.h index de87a1c..e988287 100644 --- a/src/ishmemx.h +++ b/src/ishmemx.h @@ -25,6 +25,8 @@ typedef struct ishmemx_attr_t { bool initialize_runtime = true; /* By default, gpu is used */ bool gpu = true; + /* By default, select the only visible GPU device */ + int device_id = -1; /* By default, the base team/comm is uninitialized, representing the default global team/comm */ union { /* TODO: add support for user-provided shmem_team as global team */ diff --git a/src/memory.cpp b/src/memory.cpp index 4f35408..b650be4 100644 --- a/src/memory.cpp +++ b/src/memory.cpp @@ -98,7 +98,7 @@ int ishmemi_memory_init() /* SYCL queue to initialize global_info */ try { - sycl::queue q; + sycl::queue q(ishmemi_get_selected_sycl_device()); q.copy(&ishmemi_gpu_info, global_info).wait_and_throw(); } catch (...) { ret = -1; diff --git a/src/on_queue.h b/src/on_queue.h index 2fe3e6a..ba02181 100644 --- a/src/on_queue.h +++ b/src/on_queue.h @@ -5,6 +5,7 @@ #ifndef ISHMEM_ON_QUEUE_H #define ISHMEM_ON_QUEUE_H +#include "accelerator.h" #include struct ishmemi_on_queue_map_entry_t { @@ -38,6 +39,8 @@ class ishmemi_on_queue_map : public std::map Date: Tue, 17 Mar 2026 15:04:43 +0000 Subject: [PATCH 2/2] ishmem: add device selection regression tests --- scripts/ctest/all_visible_wrapper | 8 ++ test/cmake/common.cmake | 1 + test/unit/CMakeLists.txt | 18 +++- test/unit/init_attr_device_id.cpp | 113 +++++++++++++++++++++ test/unit/init_attr_device_id_required.cpp | 53 ++++++++++ 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100755 scripts/ctest/all_visible_wrapper create mode 100644 test/unit/init_attr_device_id.cpp create mode 100644 test/unit/init_attr_device_id_required.cpp diff --git a/scripts/ctest/all_visible_wrapper b/scripts/ctest/all_visible_wrapper new file mode 100755 index 0000000..90e59a4 --- /dev/null +++ b/scripts/ctest/all_visible_wrapper @@ -0,0 +1,8 @@ +#!/bin/bash + +export ZE_FLAT_DEVICE_HIERARCHY=FLAT +export SYCL_DEVICE_FILTER=:gpu +unset ZE_AFFINITY_MASK +unset ONEAPI_DEVICE_SELECTOR + +exec "$@" diff --git a/test/cmake/common.cmake b/test/cmake/common.cmake index 8f56d60..5c71f98 100644 --- a/test/cmake/common.cmake +++ b/test/cmake/common.cmake @@ -88,6 +88,7 @@ endif() list(APPEND ISHMEM_TEST_INCLUDE_DIRS "${ISHMEM_INCLUDE}" + "${ISHMEM_ROOT_DIR}/src" "${ISHMEM_TEST_ROOT_DIR}/include" "${CMAKE_CURRENT_BINARY_DIR}/include") diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 632f119..a4ff106 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -29,7 +29,9 @@ target_link_libraries(ishmem-test-common-unit PUBLIC ${ISHMEM_TEST_LINK_LIBS}) # Setup default test values set(ISHMEM_PE_COUNTS_UNIT_TESTS "2" CACHE STRING "Number of PEs to use for each test") -set(ISHMEM_NON_STANDARD_CTEST_FILES "") +set(ISHMEM_NON_STANDARD_CTEST_FILES + init_attr_device_id + init_attr_device_id_required) set(ISHMEM_SKIP_CTEST_FILES "") set(ISHMEM_TESTER_MODES host_device_device device) @@ -104,6 +106,8 @@ set(ISHMEM_TESTER_ON_QUEUE_TESTS wait_until_some wait_until_some_vector) +set(ISHMEM_ALL_VISIBLE_RUN_SCRIPT "${SCRIPTS_DIR}/ctest/all_visible_wrapper") + enable_testing() # ------------------------------------------------------------------- @@ -130,6 +134,18 @@ foreach(TEST_SOURCE_FILE ${TEST_SOURCE_FILES}) target_link_libraries(${EXE} PRIVATE ishmem-test-common-unit) endforeach() +foreach (N ${ISHMEM_PE_COUNTS_UNIT_TESTS}) + add_test(NAME init_attr_device_id-flat-${N} COMMAND ${CTEST_WRAPPER} ${N} + ${ISHMEM_ALL_VISIBLE_RUN_SCRIPT} ./init_attr_device_id${CMAKE_EXECUTABLE_SUFFIX}) + set_tests_properties(init_attr_device_id-flat-${N} PROPERTIES SKIP_RETURN_CODE 77) +endforeach() + +add_test(NAME init_attr_device_id_required-flat COMMAND ${CMAKE_COMMAND} -E env + ZE_FLAT_DEVICE_HIERARCHY=FLAT + SYCL_DEVICE_FILTER=:gpu + ${CMAKE_CURRENT_BINARY_DIR}/init_attr_device_id_required${CMAKE_EXECUTABLE_SUFFIX}) +set_tests_properties(init_attr_device_id_required-flat PROPERTIES SKIP_RETURN_CODE 77) + # ------------------------------------------------------------------- # Add ctests diff --git a/test/unit/init_attr_device_id.cpp b/test/unit/init_attr_device_id.cpp new file mode 100644 index 0000000..19ff538 --- /dev/null +++ b/test/unit/init_attr_device_id.cpp @@ -0,0 +1,113 @@ +/* Copyright (C) 2025 Intel Corporation + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include +#include +#include + +namespace { +std::vector get_visible_level_zero_gpus() +{ + std::vector devices; + + for (const auto &platform : sycl::platform::get_platforms()) { + if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) continue; + + for (const auto &device : platform.get_devices()) { + if (device.is_gpu()) { + devices.push_back(device); + } + } + } + + return devices; +} + +int get_local_rank() +{ + constexpr const char *env_names[] = { + "MPI_LOCALRANKID", + "OMPI_COMM_WORLD_LOCAL_RANK", + "PMI_LOCAL_RANK", + "SLURM_LOCALID", + }; + + for (const char *name : env_names) { + const char *value = std::getenv(name); + if (value != nullptr) { + return std::atoi(value); + } + } + + return 0; +} +} // namespace + +int main() +{ + validate_runtime(); + + auto devices = get_visible_level_zero_gpus(); + if (devices.size() < 2) { + std::cout << "Skipping explicit device_id test because fewer than 2 visible Level Zero GPUs " + "were detected" + << std::endl; + return 77; + } + + int local_rank = get_local_rank(); + if ((local_rank < 0) || (static_cast(local_rank) >= devices.size())) { + std::cerr << "Invalid local rank " << local_rank << " for " << devices.size() + << " visible GPU devices" << std::endl; + return EXIT_FAILURE; + } + + ishmemx_attr_t attr; + attr.initialize_runtime = true; + attr.runtime = ishmemi_test_runtime->get_type(); + attr.device_id = local_rank; + ishmemx_init_attr(&attr); + + int my_pe = ishmem_my_pe(); + int npes = ishmem_n_pes(); + int peer = (my_pe + 1) % npes; + + sycl::queue q(devices[static_cast(local_rank)]); + + std::cout << "PE " << my_pe << " selected device_id " << local_rank << ": " + << q.get_device().get_info() << std::endl; + + int *source = (int *) ishmem_malloc(sizeof(int)); + CHECK_ALLOC(source); + int *target = (int *) ishmem_malloc(sizeof(int)); + CHECK_ALLOC(target); + int *host_value = sycl::malloc_host(1, q); + CHECK_ALLOC(host_value); + + q.fill(source, my_pe, 1).wait_and_throw(); + q.fill(target, -1, 1).wait_and_throw(); + ishmem_barrier_all(); + + auto e = ishmemx_int_get_on_queue(target, source, 1, peer, q); + e.wait_and_throw(); + ishmemx_quiet_on_queue(q).wait_and_throw(); + q.copy(target, host_value, 1).wait_and_throw(); + + int rc = EXIT_SUCCESS; + if (*host_value != peer) { + std::cerr << "PE " << my_pe << " expected " << peer << " but received " << *host_value + << std::endl; + rc = EXIT_FAILURE; + } else if (my_pe == 0) { + std::cout << "Test Passed" << std::endl; + } + + ishmem_barrier_all(); + sycl::free(host_value, q); + ishmem_free(target); + ishmem_free(source); + ishmem_finalize(); + + return rc; +} diff --git a/test/unit/init_attr_device_id_required.cpp b/test/unit/init_attr_device_id_required.cpp new file mode 100644 index 0000000..938f93e --- /dev/null +++ b/test/unit/init_attr_device_id_required.cpp @@ -0,0 +1,53 @@ +/* Copyright (C) 2025 Intel Corporation + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "accelerator.h" +#include +#include +#include + +namespace { +std::vector get_visible_level_zero_gpus() +{ + std::vector devices; + + for (const auto &platform : sycl::platform::get_platforms()) { + if (platform.get_backend() != sycl::backend::ext_oneapi_level_zero) continue; + + for (const auto &device : platform.get_devices()) { + if (device.is_gpu()) { + devices.push_back(device); + } + } + } + + return devices; +} +} // namespace + +int main() +{ + auto devices = get_visible_level_zero_gpus(); + if (devices.size() < 2) { + std::cout << "Skipping device_id-required test because fewer than 2 visible Level Zero " + "GPUs were detected" + << std::endl; + return 77; + } + + ishmemx_attr_t attr; + int ret = ishmemi_accelerator_init(&attr); + + if (ret == 0) { + std::cerr << "Expected accelerator initialization to fail when multiple GPUs are visible " + "and device_id is not set" + << std::endl; + ishmemi_accelerator_fini(); + return EXIT_FAILURE; + } + + std::cout << "Detected required explicit device selection with " << devices.size() + << " visible GPUs" << std::endl; + return EXIT_SUCCESS; +}