From e3bab68e34ee5385a2faac2772af0431a21f8f7d Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Wed, 14 Jan 2026 14:54:31 +0000 Subject: [PATCH 1/2] Add support for GCMC sampling on the OpenCL platform. --- src/somd2/runner/_base.py | 5 +- src/somd2/runner/_repex.py | 108 +++++++++++++++++++++++++++---------- 2 files changed, 84 insertions(+), 29 deletions(-) diff --git a/src/somd2/runner/_base.py b/src/somd2/runner/_base.py index fa857c7..87f0d0c 100644 --- a/src/somd2/runner/_base.py +++ b/src/somd2/runner/_base.py @@ -535,8 +535,8 @@ def __init__(self, system, config): # GCMC specific validation. if self._config.gcmc: - if self._config.platform != "cuda": - msg = "GCMC simulations require the CUDA platform." + if self._config.platform not in ["cuda", "opencl"]: + msg = "GCMC simulations require the CUDA or OpenCL platform." _logger.error(msg) raise ValueError(msg) @@ -747,6 +747,7 @@ def __init__(self, system, config): "tolerance": self._config.gcmc_tolerance, "restart": self._is_restart, "overwrite": self._config.overwrite, + "platform": config.platform, "no_logger": True, } else: diff --git a/src/somd2/runner/_repex.py b/src/somd2/runner/_repex.py index 4f28c3f..e6bd4f5 100644 --- a/src/somd2/runner/_repex.py +++ b/src/somd2/runner/_repex.py @@ -329,15 +329,15 @@ def _create_dynamics( # Work out the memory used by this dynamics object and GCMC sampler. mem_used = used_mem - used_mem_before - # Work out the estimate for all replicas on this device. - est_total = mem_used * contexts_per_device[device] + # Work out the estimated total after all replicas have been created. + est_total = mem_used * contexts_per_device[device] + used_mem_before # If this exceeds the total memory, raise an error. if est_total > total_mem: msg = ( f"Not enough memory on device {device} for all assigned replicas. " - f"Estimated memory usage: {est_total / 1e9:.2f} GB, " - f"Available memory: {total_mem / 1e9:.2f} GB." + f"Estimated memory usage: {est_total / (1024**3):.2f} GB, " + f"Available memory: {total_mem / (1024**3):.2f} GB." ) _logger.error(msg) raise MemoryError(msg) @@ -347,8 +347,15 @@ def _create_dynamics( _logger.warning( f"Device {device} will have less than 20% free memory " f"after creating all assigned replicas. " - f"{est_total / 1e9:.2f} GB, " - f"Available memory: {total_mem / 1e9:.2f} GB." + f"{est_total / (1024**3):.2f} GB, " + f"Available memory: {total_mem / (1024**3):.2f} GB." + ) + + else: + _logger.info( + f"Estimated memory usage on device {device} after creating all replicas: " + f"{est_total / (1024**3):.2f} GB, " + f"Available memory: {total_mem / (1024**3):.2f} GB." ) _logger.info( @@ -515,34 +522,78 @@ def get_swaps(self): return self._num_swaps @staticmethod - def _check_device_memory(index): + def _check_device_memory(device_index=0): """ - Check the memory usage of the specified CUDA device. + Check the memory usage of the specified GPU device. Parameters ---------- index: int - The index of the CUDA device. + The index of the GPU device. """ - try: - from pynvml import ( - nvmlInit, - nvmlShutdown, - nvmlDeviceGetHandleByIndex, - nvmlDeviceGetMemoryInfo, - ) + import pyopencl as cl - nvmlInit() - handle = nvmlDeviceGetHandleByIndex(index) - info = nvmlDeviceGetMemoryInfo(handle) - result = (info.used, info.free, info.total) - nvmlShutdown() - except Exception as e: - msg = f"Could not determine memory usage for device {index}: {e}" + # Get the device. + platforms = cl.get_platforms() + all_devices = [] + for platform in platforms: + try: + devices = platform.get_devices(device_type=cl.device_type.GPU) + all_devices.extend(devices) + except: + continue + + if device_index >= len(all_devices): + msg = f"Device index {device_index} out of range. Found {len(all_devices)} GPU(s)." _logger.error(msg) + raise IndexError(msg) - return result + device = all_devices[device_index] + total = device.global_mem_size + + # NVIDIA: Use pynvml + if "NVIDIA" in device.vendor: + try: + import pynvml + + pynvml.nvmlInit() + + # Find matching device by name + device_count = pynvml.nvmlDeviceGetCount() + for i in range(device_count): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + name = pynvml.nvmlDeviceGetName(handle) + + if name in device.name or device.name in name: + memory = pynvml.nvmlDeviceGetMemoryInfo(handle) + pynvml.nvmlShutdown() + return (memory.used, memory.free, memory.total) + + pynvml.nvmlShutdown() + except Exception as e: + msg = f"Could not get NVIDIA GPU memory info for device {device_index}: {e}" + _logger.error(msg) + raise RuntimeError(msg) from e + + # AMD: Use OpenCL extension + elif "AMD" in device.vendor or "Advanced Micro Devices" in device.vendor: + try: + free_memory_info = device.get_info(0x4038) + free_kb = ( + free_memory_info[0] + if isinstance(free_memory_info, list) + else free_memory_info + ) + free = free_kb * 1024 + used = total - free + return (used, free, total) + except Exception as e: + msg = ( + f"Could not get AMD GPU memory info for device {device_index}: {e}" + ) + _logger.error(msg) + raise RuntimeError(msg) from e class RepexRunner(_RunnerBase): @@ -582,9 +633,12 @@ def __init__(self, system, config): # Call the base class constructor. super().__init__(system, config) - # Make sure we're using the CUDA platform. - if self._config.platform != "cuda": - msg = "Currently replica exchange simulations can only be run on the CUDA platform." + # Make sure we're using the CUDA or OpenCL platform. + if self._config.platform not in ["cuda", "opencl"]: + msg = ( + "Currently replica exchange simulations can only be " + "run on the CUDA and OpenCL platforms." + ) _logger.error(msg) raise ValueError(msg) From 4fa35410504a36f19d3ab1f808dd4c34f05ea427 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Thu, 15 Jan 2026 16:24:50 +0000 Subject: [PATCH 2/2] Update GCMC platform requirement. --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ab45be7..f8174c8 100644 --- a/README.md +++ b/README.md @@ -145,13 +145,13 @@ somd2 --help | grep -A2 ' --gcmc' ``` > [!NOTE] -> GCMC is currently only supported when using the CUDA platform and isn't -> available on macOS, where the `pycuda` package is not available. +> GCMC is only supported when using the CUDA or OpenCL platforms. -Make sure that `nvcc` is in your `PATH`. If you require a different `nvcc` to that -provided by conda, you can set the `PYCUDA_NVCC` environment variable to point -to the desired `nvcc` binary. Depending on your setup, you may also need to install -the `cuda-nvvm` package from `conda-forge`. +When using the CUDA platform, make sure that `nvcc` is in your `PATH`. If you +require a different `nvcc` to that provided by conda, you can set the +`PYCUDA_NVCC` environment variable to point to the desired `nvcc` binary. +Depending on your setup, you may also need to install the `cuda-nvvm` package +from `conda-forge`. ## Analysis