diff --git a/.github/workflows/pip_install.yml b/.github/workflows/pip_install.yml index 9a40b133..0986d54a 100644 --- a/.github/workflows/pip_install.yml +++ b/.github/workflows/pip_install.yml @@ -30,9 +30,9 @@ jobs: - name: Install ReFrame run: | - python -m pip install --user reframe-hpc + python -m pip install --user reframe-hpc==4.9.3 # remove hpctestlib directory, which is automatically installed with reframe-hpc - pip show reframe-hpc | grep Location | cut -d ' ' -f 2 | xargs -I {} rm -r {}/hpctestlib + pip show reframe-hpc | grep Location | cut -d ' ' -f 2 | xargs -I {} rm -rf {}/hpctestlib - name: Install EESSI test suite with 'pip install' run: | diff --git a/config/aws_mc.py b/config/aws_mc.py index 602d2cca..4e8a6fb5 100644 --- a/config/aws_mc.py +++ b/config/aws_mc.py @@ -12,7 +12,7 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES @@ -112,7 +112,9 @@ FEATURES.CPU ] + list(SCALES.keys()), 'prepare_cmds': [ - common_eessi_init(), + # This system doesn't have an lmod installation by default, so source one from EESSI + 'source /cvmfs/software.eessi.io/2025.06/init/lmod/bash', + 'module unload EESSI', # Required when using srun as launcher with --export=NONE in partition access, in order to ensure job # steps inherit environment. It doesn't hurt to define this even if srun is not used 'export SLURM_EXPORT_ENV=ALL' diff --git a/config/bsc_marenostrum5.py b/config/bsc_marenostrum5.py index f8bb68e5..56a19ae1 100644 --- a/config/bsc_marenostrum5.py +++ b/config/bsc_marenostrum5.py @@ -1,6 +1,6 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, get_sbatch_account, set_common_required_config) from eessi.testsuite.constants import DEVICE_TYPES, EXTRAS, FEATURES, GPU_VENDORS, SCALES @@ -30,7 +30,6 @@ 'env_vars': [], 'prepare_cmds': [ "module unuse /apps/GPP/modulefiles/applications", - common_eessi_init(), 'export OMPI_MCA_pml=ucx', # Work around "Failed to modify UD QP to INIT on mlx5_0: Operation not permitted" issue # until we can resolve this through an LMOD hook in host_injections. @@ -60,7 +59,6 @@ 'env_vars': [], 'prepare_cmds': [ "module unuse /apps/GPP/modulefiles/applications", - common_eessi_init(), 'export OMPI_MCA_pml=ucx', # Work around "Failed to modify UD QP to INIT on mlx5_0: Operation not permitted" issue # until we can resolve this through an LMOD hook in host_injections. diff --git a/config/github_actions.py b/config/github_actions.py index adb92e3e..3bf0c878 100644 --- a/config/github_actions.py +++ b/config/github_actions.py @@ -16,7 +16,6 @@ 'name': 'default', 'scheduler': 'local', 'launcher': 'local', - 'environs': ['default'], 'features': [FEATURES.CPU] + list(SCALES.keys()), 'processor': { 'num_cpus': 2, diff --git a/config/it4i_karolina.py b/config/it4i_karolina.py index f0e32204..27380a93 100644 --- a/config/it4i_karolina.py +++ b/config/it4i_karolina.py @@ -15,7 +15,7 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, get_sbatch_account, set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES @@ -42,7 +42,6 @@ 'name': 'qcpu', 'scheduler': 'slurm', 'prepare_cmds': [ - common_eessi_init(), # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=ALL', # Needed when using srun launcher @@ -77,7 +76,6 @@ # 'name': 'qgpu', # 'scheduler': 'slurm', # 'prepare_cmds': [ - # common_eessi_init(), # # Pass job environment variables like $PATH, etc., into job steps # 'export SLURM_EXPORT_ENV=ALL', # # Needed when using srun launcher diff --git a/config/izum_vega.py b/config/izum_vega.py index 09057afd..09d6a39d 100644 --- a/config/izum_vega.py +++ b/config/izum_vega.py @@ -15,7 +15,7 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, set_common_required_config) from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES @@ -37,7 +37,6 @@ 'name': 'cpu', 'scheduler': 'slurm', 'prepare_cmds': [ - common_eessi_init(), # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=ALL', # Needed when using srun launcher @@ -66,7 +65,6 @@ 'name': 'gpu', 'scheduler': 'slurm', 'prepare_cmds': [ - common_eessi_init(), # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=ALL', # Needed when using srun launcher diff --git a/config/macc_deucalion.py b/config/macc_deucalion.py index a81b27f2..3e6f1af8 100644 --- a/config/macc_deucalion.py +++ b/config/macc_deucalion.py @@ -1,6 +1,6 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, set_common_required_config) from eessi.testsuite.constants import EXTRAS, FEATURES, SCALES @@ -26,7 +26,6 @@ # bypass CPU autodetection for now aarch64/a64fx, # see https://github.com/EESSI/software-layer/pull/608 'export EESSI_SOFTWARE_SUBDIR_OVERRIDE=aarch64/a64fx', - common_eessi_init(), # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=HOME,PATH,LD_LIBRARY_PATH,PYTHONPATH', ], diff --git a/config/settings_example.py b/config/settings_example.py index 4057bd67..da918483 100644 --- a/config/settings_example.py +++ b/config/settings_example.py @@ -41,7 +41,12 @@ 'launcher': 'mpirun', 'access': ['-p cpu', '--export=None'], 'prepare_cmds': [ - common_eessi_init(), + # If your system doesn't have an Lmod installation by default on the batch nodes + # Uncommenting the following two lines will use one from EESSI. Note that it is up to you + # to pick a version for EESSI from which you'd like to use the lmod installation - it's + # hard-coded to 2025.06 here. + # 'source /cvmfs/software.eessi.io/2025.06/init/lmod/bash', + # 'module unload EESSI', # Pass job environment variables like $PATH, etc., into job steps 'export SLURM_EXPORT_ENV=ALL', ], diff --git a/config/surf_snellius.py b/config/surf_snellius.py index 3df247f7..b4f385bc 100644 --- a/config/surf_snellius.py +++ b/config/surf_snellius.py @@ -15,7 +15,7 @@ import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, set_common_required_config) from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES @@ -42,7 +42,6 @@ { 'name': 'rome', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', 'access': ['-p rome', '--export=None'], 'max_jobs': 120, @@ -59,12 +58,6 @@ { 'name': 'genoa', 'scheduler': 'slurm', - 'prepare_cmds': [ - # EESSI init script (for now) falls back to zen3, since the zen4 is incomplete - # But, we want to really test the zen4 branch on these nodes - 'export EESSI_SOFTWARE_SUBDIR_OVERRIDE=x86_64/amd/zen4', - common_eessi_init() - ], 'launcher': 'mpirun', 'access': ['-p genoa', '--export=None'], 'max_jobs': 120, @@ -81,7 +74,6 @@ { 'name': 'gpu_A100', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', 'access': ['-p gpu_a100', '--export=None'], 'max_jobs': 60, @@ -106,7 +98,6 @@ { 'name': 'gpu_H100', 'scheduler': 'slurm', - 'prepare_cmds': [common_eessi_init()], 'launcher': 'mpirun', 'access': ['-p gpu_h100', '--export=None'], 'max_jobs': 60, diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 4dcd078d..67d23e6f 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -22,11 +22,11 @@ # ``` import os -from eessi.testsuite.common_config import (common_eessi_init, common_general_config, common_logging_config, +from eessi.testsuite.common_config import (common_general_config, common_logging_config, get_sbatch_account, set_common_required_config) from eessi.testsuite.constants import EXTRAS, DEVICE_TYPES, FEATURES, GPU_VENDORS, SCALES -hortense_access = ['--export=NONE', '--get-user-env=60L'] +hortense_access = ['--export=NONE', '--get-user-env'] # Note that we rely on the SBATCH_ACCOUNT environment variable to be specified # From ReFrame 4.8.1 we can no longer rely on SBATCH_ACCOUNT completely @@ -46,9 +46,10 @@ post_init = 'unset SLURM_EXPORT_ENV' launcher = "mpirun" -eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) -if eessi_cvmfs_repo is not None: - prepare_eessi_init = "module --force purge" +eessi_modulepath = '/cvmfs/software.eessi.io/init/modules' +modulepaths = os.getenv('MODULEPATH', '').split(':') +if eessi_modulepath in modulepaths: + prepare_eessi_init = f"module --force purge && module use {eessi_modulepath}" mpi_module = "env/vsc/dodrio/{}" else: prepare_eessi_init = "" @@ -70,7 +71,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [ prepare_eessi_init, - common_eessi_init(), post_init, ], 'access': hortense_access + ['--partition=cpu_rome_rhel9'], @@ -81,6 +81,7 @@ 'descr': 'CPU nodes (AMD Rome, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, + 'environs': ['default'], 'modules': [mpi_module.format('cpu_rome_rhel9')], 'features': [ FEATURES.CPU, @@ -96,7 +97,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [ prepare_eessi_init, - common_eessi_init(), post_init, ], 'access': hortense_access + ['--partition=cpu_rome_512_rhel9'], @@ -107,6 +107,7 @@ 'descr': 'CPU nodes (AMD Rome, 512GiB RAM)', 'max_jobs': 20, 'launcher': launcher, + 'environs': ['default'], 'modules': [mpi_module.format('cpu_rome_512_rhel9')], 'features': [ FEATURES.CPU, @@ -122,7 +123,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [ prepare_eessi_init, - common_eessi_init(), post_init, ], 'access': hortense_access + ['--partition=cpu_milan_rhel9'], @@ -133,6 +133,7 @@ 'descr': 'CPU nodes (AMD Milan, 256GiB RAM)', 'max_jobs': 20, 'launcher': launcher, + 'environs': ['default'], 'modules': [mpi_module.format('cpu_milan_rhel9')], 'features': [ FEATURES.CPU, @@ -148,7 +149,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [ prepare_eessi_init, - common_eessi_init(), post_init, ], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], @@ -159,6 +159,7 @@ 'descr': 'GPU nodes (A100 40GB)', 'max_jobs': 20, 'launcher': launcher, + 'environs': ['default'], 'modules': [mpi_module.format('gpu_rome_a100_40')], 'features': [ FEATURES.GPU, @@ -182,7 +183,6 @@ 'scheduler': 'slurm', 'prepare_cmds': [ prepare_eessi_init, - common_eessi_init(), post_init, ], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], @@ -193,6 +193,7 @@ 'descr': 'GPU nodes (A100 80GB)', 'max_jobs': 20, 'launcher': launcher, + 'environs': ['default'], 'modules': [mpi_module.format('gpu_rome_a100_80')], 'features': [ FEATURES.GPU, @@ -214,11 +215,15 @@ ] }, ], + 'environments': [ + { + 'name': 'default', + }, + ], 'general': [ { 'remote_detect': True, 'purge_environment': True, - 'resolve_module_conflicts': False, # avoid loading the module before submitting the job **common_general_config() } ], diff --git a/eessi/testsuite/common_config.py b/eessi/testsuite/common_config.py index 44fbe0dd..a7728021 100644 --- a/eessi/testsuite/common_config.py +++ b/eessi/testsuite/common_config.py @@ -42,8 +42,11 @@ def set_common_required_config(site_configuration: dict, set_memory: bool = True :param site_configuration: site configuration dictionary :param set_memory: whether to set memory resources """ - environments = [{'name': 'default'}] - environs = ['default'] + environments = [ + {'name': 'EESSI-2023.06', 'modules': ['EESSI/2023.06']}, + {'name': 'EESSI-2025.06', 'modules': ['EESSI/2025.06']}, + ] + environs = ['EESSI-2023.06', 'EESSI-2025.06'] use_nodes_option = True if set_memory: resources_memory = [{ @@ -58,16 +61,23 @@ def set_common_required_config(site_configuration: dict, set_memory: bool = True }] if 'environments' in site_configuration and site_configuration['environments'] != environments: - getlogger().info(f"Changing environments in site config to {environments}") - site_configuration['environments'] = environments + msg = f"Appending environments {environments} to the environments already present in the site_configuration" + msg += f" ({site_configuration['environments']})" + getlogger().info(msg) + site_configuration['environments'].extend(environments) + else: + site_configuration['environments'] = environments for system in site_configuration.get('systems', []): for partition in system.get('partitions', []): # Set or overwrite the partition environment if 'environs' in partition and partition['environs'] != environs: - getlogger().info( - f"Changing environs in site config to {environs} for {system['name']}:{partition['name']}") - partition['environs'] = environs + msg = f"Appending environs {environs} to the existing environs ({partition['environs']})" + msg += f" for {system['name']}:{partition['name']}" + getlogger().info(msg) + partition['environs'].extend(environs) + else: + partition['environs'] = environs # Set or overwrite the 'use_nodes_option' scheduler option, if this is a SLURM-like scheduler if partition['scheduler'] in ['slurm', 'squeue']: @@ -159,48 +169,16 @@ def common_general_config(prefix=None): def common_eessi_init(eessi_version=None): """ - Returns the full path that should be sourced to initialize the EESSI environment for a given version of EESSI. - If no eessi_version is passed, the EESSI_VERSION environment variable is read. - If that is also not defined, default behaviour is to use `latest`. - :param eessi_version: version of EESSI that should be sourced (e.g. '2023.06' or 'latest') [optional] + Deprecated - print warning with suggested change. """ - # Check which EESSI_CVMFS_REPO we are running under - eessi_cvmfs_repo = os.getenv('EESSI_CVMFS_REPO', None) - - if eessi_cvmfs_repo is None: - getlogger().warning(' '.join([ - "Environment variable 'EESSI_CVMFS_REPO' is not defined.", - "If you plan to use the EESSI software stack,", - "make sure to initialize the EESSI environment before running the test suite.", - ])) - return '' - - eessi_init = [] - pilot_repo = '/cvmfs/pilot.eessi-hpc.org' - - if eessi_cvmfs_repo == pilot_repo: - eessi_init.append('export EESSI_FORCE_PILOT=1') - if eessi_version is None: - # Try also EESSI_VERSION for backwards compatibility with previous common_eessi_init implementation - eessi_version = os.getenv('EESSI_PILOT_VERSION', os.getenv('EESSI_VERSION', 'latest')) - else: - # software.eessi.io, or another where we assume the same variable names to be used - if eessi_version is None: - eessi_version = os.getenv('EESSI_VERSION', None) - # Without EESSI_VERSION, we don't know what to do. There is no default/latest version - # So, report error - if eessi_version is None: - err_msg = "Environment variable 'EESSI_VERSION' was not found." - err_msg += " Did you initialize the EESSI environment before running the test suite?" - raise ValueError(err_msg) - - if eessi_cvmfs_repo == pilot_repo and eessi_version == 'latest': - version_string = eessi_version - else: - version_string = f'versions/{eessi_version}' - - eessi_init.append(f'source {eessi_cvmfs_repo}/{version_string}/init/bash') - return ' && '.join(eessi_init) + getlogger().warning(' '.join([ + 'common_eessi_init() is deprecated, you should replace the prepare_cmds in your ReFrame configuration.' + ' On systems that have a module command available, you should no longer need any prepare_cmds.' + " On systems that don't have a module command available, you need something like" + " 'prepare_cmds' : ['source /cvfms/software.eessi.io/2025.06/init/lmod/bash && module unload EESSI']" + " in order to use the Lmod from the EESSI compatibility layer (but not yet have an EESSI version loaded)" + ])) + return 'source /cvfms/software.eessi.io/2025.06/init/lmod/bash && module unload EESSI' def get_sbatch_account(): diff --git a/eessi/testsuite/eessi_mixin.py b/eessi/testsuite/eessi_mixin.py index 314ad45e..7c345fb9 100644 --- a/eessi/testsuite/eessi_mixin.py +++ b/eessi/testsuite/eessi_mixin.py @@ -9,9 +9,11 @@ from reframe.core.pipeline import RegressionMixin as RegressionTestPlugin from reframe.utility.sanity import make_performance_function import reframe.utility.sanity as sn +from reframe.core.runtime import valid_sysenv_comb +from reframe import VERSION as reframe_version from eessi.testsuite import check_process_binding, hooks -from eessi.testsuite.constants import COMPUTE_UNITS, DEVICE_TYPES, SCALES, TAGS +from eessi.testsuite.constants import COMPUTE_UNITS, DEVICE_TYPES, INVALID_SYSTEM, SCALES, TAGS from eessi.testsuite.utils import EESSIError, log, log_once from eessi.testsuite import __version__ as testsuite_version @@ -36,7 +38,7 @@ class EESSI_Mixin(RegressionTestPlugin): That definition needs to be done 'on time', i.e. early enough in the execution of the ReFrame pipeline. Here, we list which class attributes must be defined by the child class, and by (the end of) what phase: - - Init phase: device_type, scale, module_name, bench_name + - Init phase: device_type, scale, module_info, bench_name - Setup phase: compute_unit, required_mem_per_node The child class may also overwrite the following attributes: @@ -80,13 +82,13 @@ class EESSI_Mixin(RegressionTestPlugin): # Note that the error for an empty parameter is a bit unclear for ReFrame 4.6.2, but that will hopefully improve # see https://github.com/reframe-hpc/reframe/issues/3254 - # If that improves: uncomment the following to force the user to set module_name - # module_name = parameter() + # If that improves: uncomment the following to force the user to set module_info + # module_info = parameter() def __init_subclass__(cls, **kwargs): " set default values for built-in ReFrame attributes " super().__init_subclass__(**kwargs) - cls.valid_prog_environs = ['default'] + cls.valid_prog_environs = ['*'] cls.valid_systems = ['*'] if not cls.time_limit: cls.time_limit = '1h' @@ -137,7 +139,7 @@ def mark_all_files_readonly(self): def EESSI_mixin_validate_init(self): """Check that all variables that have to be set for subsequent hooks in the init phase have been set""" # List which variables we will need/use in the run_after('init') hooks - var_list = ['device_type', 'scale', 'module_name', 'measure_memory_usage'] + var_list = ['device_type', 'scale', 'module_info', 'measure_memory_usage'] for var in var_list: if not hasattr(self, var): msg = "The variable '%s' should be defined in any test class that inherits" % var @@ -149,7 +151,6 @@ def EESSI_mixin_validate_init(self): self.EESSI_mixin_validate_item_in_list('device_type', DEVICE_TYPES[:]) self.EESSI_mixin_validate_item_in_list('scale', SCALES.keys()) self.EESSI_mixin_validate_item_in_list('valid_systems', [['*']]) - self.EESSI_mixin_validate_item_in_list('valid_prog_environs', [['default']]) @run_after('init') def EESSI_mixin_run_after_init(self): @@ -162,11 +163,6 @@ def EESSI_mixin_run_after_init(self): # Filter on which scales are supported by the partitions defined in the ReFrame configuration hooks.filter_supported_scales(self) - hooks.set_modules(self) - - if self.require_buildenv_module: - hooks.add_buildenv_module(self) - thread_binding = self.thread_binding.lower() if thread_binding in ('true', 'compact'): hooks.set_compact_thread_binding(self) @@ -174,7 +170,48 @@ def EESSI_mixin_run_after_init(self): err_msg = f"Invalid thread_binding value '{thread_binding}'. Valid values: 'true', 'compact', or 'false'." raise EESSIError(err_msg) - hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) + # Unpack module_info + syspart, env, mod = self.module_info + self.valid_prog_environs = [env] + self.module_name = mod + + # Set module_names + hooks.set_module_names(self) + + # Add buildenv module if requested + if self.require_buildenv_module: + hooks.add_buildenv_module(self) + + # Set modules + hooks.set_modules(self) + + # Checks reframe version, and if newer or equal to 4.10, use the new functionality + # that allows combining sys:part notation with +feat. + syspart_feat_supported = False + try: + import semver + if semver.VersionInfo.parse(reframe_version) >= semver.VersionInfo.parse("4.10.0"): + syspart_feat_supported = True + except ImportError: + pass + + # If we use reframe 4.10.0 or later, we can just set the valid system and the hook will + # append any relevant features. Otherwise, as a fallback, we set the features first + # (by calling the hook), then check if the sys:part combination from the find_modules triplet + # is in the list of valid combinations for the given features + if syspart_feat_supported: + self.valid_systems = [syspart] + hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) + else: + # Filter by defice type. E.g. add features based on whether CUDA appears in the module name + hooks.filter_valid_systems_by_device_type(self, required_device_type=self.device_type) + + # Check if partitions returned by find_modules satisfy the current features/extras in valid_systems + valid_partitions = [part.fullname for part in valid_sysenv_comb(self.valid_systems, env)] + if syspart in valid_partitions: + self.valid_systems = [syspart] + else: + self.valid_systems = [INVALID_SYSTEM] # Set scales as tags hooks.set_tag_scale(self) diff --git a/eessi/testsuite/hooks.py b/eessi/testsuite/hooks.py index 919a63c7..e218d5f8 100644 --- a/eessi/testsuite/hooks.py +++ b/eessi/testsuite/hooks.py @@ -15,7 +15,7 @@ select_matching_modules) # global variables -_buildenv_modules = [] +_buildenv_module_infos = [] def _set_job_resources(test: rfm.RegressionTest): @@ -454,7 +454,7 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str): return # test.valid_systems wasn't set yet, so set it - if len(test.valid_systems) == 0 or test.valid_systems == [INVALID_SYSTEM]: + if test.valid_systems == [INVALID_SYSTEM]: # test.valid_systems is empty or invalid, meaning all tests are filtered out. This hook shouldn't change that return # test.valid_systems still at default value, so overwrite @@ -517,7 +517,7 @@ def filter_valid_systems_by_device_type(test: rfm.RegressionTest, required_devic # Change test.valid_systems accordingly: _set_or_append_valid_systems(test, valid_systems) - log(f'valid_systems set to {test.valid_systems}') + log(f'valid_systems set to {test.valid_systems} for device type {required_device_type}') def filter_valid_systems_for_offline_partitions(test: rfm.RegressionTest): @@ -642,11 +642,9 @@ def req_memory_per_node(test: rfm.RegressionTest, app_mem_req: float): rflog.getlogger().warning(msg) -def set_modules(test: rfm.RegressionTest): +def set_module_names(test: rfm.RegressionTest): """ - Set modules test parameter via module_name, which can be a string or a list of strings - Skip current test if any of the module names is not present in the list of modules, - specified with --setvar modules=. + Set module_names via module_name, which can be a string or a list of strings """ if not test.module_name: return @@ -656,14 +654,24 @@ def set_modules(test: rfm.RegressionTest): test.module_names = test.module_name else: raise TypeError(f'module_name is a {type(test.module_name).__name__}, should be string, list, or tuple') + + log(f'module_names set to {test.module_names}') + + +def set_modules(test: rfm.RegressionTest): + """ + If any of the module names is not present in the list of modules, + (specified with --setvar modules=), then skip current test. + Otherwise, set modules test parameter equal to module_names + """ if test.modules: for name in test.module_names: if name not in test.modules: - test.valid_systems = [] + test.valid_systems = [INVALID_SYSTEM] log(f'module {name} not in {test.modules}, valid_systems set to {test.valid_systems}') + return test.modules = test.module_names - log(f'modules set to {test.modules}') def set_tag_scale(test: rfm.RegressionTest): @@ -826,58 +834,67 @@ def extract_memory_usage(self): def add_buildenv_module(test: rfm.RegressionTest, index=-1): """ - Add a buildenv module that matches the reference module to the list of modules + Add a buildenv module that matches the reference module to the list of module names Arguments: - test: ReFrame test to which this hook should apply - - index: module index in test.modules to take as the reference (default is last); - note that the reference module’s toolchain should not be at the system - level: otherwise only buildenv modules at the system level can be added + - index: module index in test.module_names to take as the reference (default is last) + Note that the reference module’s toolchain should not be at the system + level: otherwise only buildenv modules at the system level can be added. Requirements: - recent enough easybuild python package - a matching default buildenv module (e.g. buildenv/default-foss-2024a) available on the system """ - for mod in test.modules: + if test.valid_systems == [INVALID_SYSTEM]: + return + + for mod in test.module_names: if mod.split('/')[0] == 'buildenv': # buildenv module already in the list return # get list of buildenv modules on the system # make global to avoid calculating _buildenv_modules multiple times - global _buildenv_modules - if not _buildenv_modules: - _buildenv_modules = set(find_modules('buildenv')) + global _buildenv_module_infos + if _buildenv_module_infos == []: + _buildenv_module_infos = set(find_modules('buildenv')) to_remove = [] - for mod in _buildenv_modules: - mod_parts = split_module(mod) + for mod_info in _buildenv_module_infos: + mod_parts = split_module(mod_info[2]) if mod_parts[4] or mod_parts[1] != 'default': # only consider default buildenv modules without versionsuffixes - to_remove.append(mod) + to_remove.append(mod_info) - _buildenv_modules = [x for x in _buildenv_modules if x not in to_remove] + _buildenv_module_infos = [x for x in _buildenv_module_infos if x not in to_remove] - if not _buildenv_modules: + if not _buildenv_module_infos: + # set to False so we don't try to find them again + _buildenv_module_infos = False msg = 'No default buildenv modules without versionsuffixes found on the system.' log(msg) test.valid_systems = [INVALID_SYSTEM] return - ref_module = test.modules[index] - matching_modules = select_matching_modules(list(_buildenv_modules), ref_module) + syspart, env, _ = test.module_info + # only consider the buildenv modules with corresponding system:partition and programming environment + buildenv_mod_infos = [x for x in _buildenv_module_infos if x[0] == syspart and x[1] == env] + + ref_mod_info = (syspart, env, test.module_names[index]) - if not matching_modules: - msg = f'No matching buildenv module for {ref_module} found on the system.' + matching_mod_infos = select_matching_modules(list(buildenv_mod_infos), ref_mod_info) + if not matching_mod_infos: + msg = f'No matching buildenv module for {ref_mod_info} found on the system.' log(msg) test.valid_systems = [INVALID_SYSTEM] return - if len(matching_modules) > 1: - msg = f'Multiple matching buildenv modules found, will use the first one: {_buildenv_modules}.' + if len(matching_mod_infos) > 1: + msg = f'Multiple matching buildenv modules found, will use the first one: {matching_mod_infos[0]}.' log(msg) - buildenv_mod = matching_modules[0] + buildenv_mod = matching_mod_infos[0][2] # insert to keep the most important module last - test.modules.insert(0, buildenv_mod) - log(f'Module {buildenv_mod} added to list of modules') + test.module_names.insert(0, buildenv_mod) + log(f'module_names set to {test.module_names}') diff --git a/eessi/testsuite/tests/apps/MetalWalls.py b/eessi/testsuite/tests/apps/MetalWalls.py index cd648a3a..53da636b 100644 --- a/eessi/testsuite/tests/apps/MetalWalls.py +++ b/eessi/testsuite/tests/apps/MetalWalls.py @@ -46,7 +46,7 @@ class EESSI_MetalWalls_MW(MetalWallsCheck, EESSI_Mixin): # input files are downloaded readonly_files = [''] - module_name = parameter(find_modules('MetalWalls')) + module_info = parameter(find_modules('MetalWalls')) # For now, MetalWalls is being build for CPU targets only # compute_device = parameter([DEVICE_TYPES.CPU, DEVICE_TYPES.GPU]) device_type = parameter([DEVICE_TYPES.CPU]) diff --git a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py index 32ae0994..7a80a565 100644 --- a/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py +++ b/eessi/testsuite/tests/apps/PyTorch/PyTorch_torchvision.py @@ -15,7 +15,7 @@ class EESSI_PyTorch_torchvision(rfm.RunOnlyRegressionTest, EESSI_Mixin): nn_model = parameter(['vgg16', 'resnet50', 'resnet152', 'densenet121', 'mobilenet_v3_large']) parallel_strategy = parameter([None, 'ddp']) # Both torchvision and PyTorch-bundle modules have everything needed to run this test - module_name = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle'))) + module_info = parameter(chain(find_modules('torchvision'), find_modules('PyTorch-bundle'))) executable = 'python' time_limit = '30m' readonly_files = ['get_free_socket.py', 'pytorch_synthetic_benchmark.py'] diff --git a/eessi/testsuite/tests/apps/QuantumESPRESSO.py b/eessi/testsuite/tests/apps/QuantumESPRESSO.py index c26c8026..495b0c0e 100644 --- a/eessi/testsuite/tests/apps/QuantumESPRESSO.py +++ b/eessi/testsuite/tests/apps/QuantumESPRESSO.py @@ -40,7 +40,7 @@ @rfm.simple_test class EESSI_QuantumESPRESSO_PW(QEspressoPWCheck, EESSI_Mixin): time_limit = '30m' - module_name = parameter(find_modules('QuantumESPRESSO')) + module_info = parameter(find_modules('QuantumESPRESSO')) # For now, QE is built for CPU targets only device_type = parameter([DEVICE_TYPES.CPU]) readonly_files = [''] diff --git a/eessi/testsuite/tests/apps/cp2k/cp2k.py b/eessi/testsuite/tests/apps/cp2k/cp2k.py index 53de55d5..78e8605d 100644 --- a/eessi/testsuite/tests/apps/cp2k/cp2k.py +++ b/eessi/testsuite/tests/apps/cp2k/cp2k.py @@ -17,7 +17,7 @@ class EESSI_CP2K(rfm.RunOnlyRegressionTest, EESSI_Mixin): ('QS/H2O-512', -8808.1439, 1e-4), ], fmt=lambda x: x[0], loggable=True) - module_name = parameter(find_modules('CP2K')) + module_info = parameter(find_modules('CP2K')) scale = parameter(SCALES.keys()) executable = 'cp2k.popt' diff --git a/eessi/testsuite/tests/apps/espresso/espresso.py b/eessi/testsuite/tests/apps/espresso/espresso.py index d1e9a56c..d231b9ae 100644 --- a/eessi/testsuite/tests/apps/espresso/espresso.py +++ b/eessi/testsuite/tests/apps/espresso/espresso.py @@ -36,7 +36,7 @@ def filter_scales(): class EESSI_ESPRESSO_base(rfm.RunOnlyRegressionTest): - module_name = parameter(find_modules('^ESPResSo$')) + module_info = parameter(find_modules('^ESPResSo$')) device_type = DEVICE_TYPES.CPU compute_unit = COMPUTE_UNITS.CPU time_limit = '300m' diff --git a/eessi/testsuite/tests/apps/gromacs.py b/eessi/testsuite/tests/apps/gromacs.py index 5f4fd9b2..33fae4b3 100644 --- a/eessi/testsuite/tests/apps/gromacs.py +++ b/eessi/testsuite/tests/apps/gromacs.py @@ -49,7 +49,7 @@ def set_device_type(self): class EESSI_GROMACS(EESSI_GROMACS_base, EESSI_Mixin): scale = parameter(SCALES.keys()) time_limit = '30m' - module_name = parameter(find_modules('GROMACS')) + module_info = parameter(find_modules('GROMACS')) # input files are downloaded readonly_files = [''] # executable_opts in addition to those set by the hpctestlib diff --git a/eessi/testsuite/tests/apps/lammps/lammps.py b/eessi/testsuite/tests/apps/lammps/lammps.py index ba6e3e32..f5990b52 100644 --- a/eessi/testsuite/tests/apps/lammps/lammps.py +++ b/eessi/testsuite/tests/apps/lammps/lammps.py @@ -53,7 +53,7 @@ class EESSI_LAMMPS_base(rfm.RunOnlyRegressionTest): device_type = parameter([DEVICE_TYPES.CPU, DEVICE_TYPES.GPU]) # Parameterize over all modules that start with LAMMPS - module_name = parameter(find_modules('LAMMPS')) + module_info = parameter(find_modules('LAMMPS')) all_readonly_files = True is_ci_test = True @@ -240,7 +240,7 @@ class EESSI_LAMMPS_ALL_balance_staggered_global_base(EESSI_LAMMPS_base): sourcesdir = 'src/ALL+OBMD' # This requires a LAMMPS with ALL functionality, i.e. only select modules with ALL in the versionsuffix - module_name = parameter(find_modules(r'LAMMPS\/.*-.*ALL', name_only=False)) + module_info = parameter(find_modules(r'LAMMPS\/.*-.*ALL', name_only=False)) @deferrable def check_number_neighbors(self): @@ -366,7 +366,7 @@ class EESSI_LAMMPS_ALL_OBMD_simulation_staggered_global(EESSI_LAMMPS_base, EESSI executable = 'lmp -in in.simulation.staggered.global' # This requires a LAMMPS with ALL+OMBD functionality, i.e. only select modules with -ALL_OBMD versionsuffix - module_name = parameter(find_modules(r'LAMMPS\/.*-.*ALL.*OBMD', name_only=False)) + module_info = parameter(find_modules(r'LAMMPS\/.*-.*ALL.*OBMD', name_only=False)) @sanity_function def assert_sanity(self): @@ -405,7 +405,7 @@ class EESSI_LAMMPS_OBMD_simulation(EESSI_LAMMPS_base, EESSI_Mixin): # This requires a LAMMPS with OBMD functionality, i.e. only select modules with -OBMD versionsuffix # We _could_ remove the '-' and '$' to also match e.g. ALL_OBMD - module_name = parameter(find_modules(r'LAMMPS\/.*-.*OBMD', name_only=False)) + module_info = parameter(find_modules(r'LAMMPS\/.*-.*OBMD', name_only=False)) @sanity_function def assert_sanity(self): diff --git a/eessi/testsuite/tests/apps/lbmpy-pssrt/lbmpy-pssrt.py b/eessi/testsuite/tests/apps/lbmpy-pssrt/lbmpy-pssrt.py index 1125721b..5b1d0570 100644 --- a/eessi/testsuite/tests/apps/lbmpy-pssrt/lbmpy-pssrt.py +++ b/eessi/testsuite/tests/apps/lbmpy-pssrt/lbmpy-pssrt.py @@ -48,7 +48,7 @@ class EESSI_lbmpy_pssrt(rfm.RunOnlyRegressionTest, EESSI_Mixin): launcher = 'local' # no MPI module is loaded in this test - module_name = parameter(find_modules('lbmpy-pssrt')) + module_info = parameter(find_modules('lbmpy-pssrt')) readonly_files = ['mixing_layer_2D.py'] diff --git a/eessi/testsuite/tests/apps/lpc3d/lpc3d.py b/eessi/testsuite/tests/apps/lpc3d/lpc3d.py index ed972a41..b3250450 100644 --- a/eessi/testsuite/tests/apps/lpc3d/lpc3d.py +++ b/eessi/testsuite/tests/apps/lpc3d/lpc3d.py @@ -44,7 +44,7 @@ class EESSI_LPC3D(rfm.RunOnlyRegressionTest, EESSI_Mixin): launcher = 'local' # no MPI module is loaded in this test - module_name = parameter(find_modules('LPC3D')) + module_info = parameter(find_modules('LPC3D')) readonly_files = ['lattice_gas.inpt', 'pore_dens_freq_2neg.txt', 'psd.txt'] diff --git a/eessi/testsuite/tests/apps/numpy/numpy.py b/eessi/testsuite/tests/apps/numpy/numpy.py index 8532fa80..82b0229d 100644 --- a/eessi/testsuite/tests/apps/numpy/numpy.py +++ b/eessi/testsuite/tests/apps/numpy/numpy.py @@ -24,7 +24,7 @@ class EESSI_NumPy(rfm.RunOnlyRegressionTest, EESSI_Mixin): executable = './np_ops.py' time_limit = '30m' readonly_files = ['np_ops.py'] - module_name = parameter(find_modules('SciPy-bundle')) + module_info = parameter(find_modules('SciPy-bundle')) device_type = DEVICE_TYPES.CPU compute_unit = COMPUTE_UNITS.NODE scale = parameter([ diff --git a/eessi/testsuite/tests/apps/openfoam/openfoam.py b/eessi/testsuite/tests/apps/openfoam/openfoam.py index ad646893..2ede4cbd 100644 --- a/eessi/testsuite/tests/apps/openfoam/openfoam.py +++ b/eessi/testsuite/tests/apps/openfoam/openfoam.py @@ -87,7 +87,7 @@ class EESSI_OPENFOAM_LID_DRIVEN_CAVITY_64M(rfm.RunOnlyRegressionTest, EESSI_Mixi time_limit = '120m' readonly_files = [''] device_type = parameter([DEVICE_TYPES.CPU]) - module_name = parameter(find_modules('OpenFOAM/v', name_only=False)) + module_info = parameter(find_modules('OpenFOAM/v', name_only=False)) valid_systems = ['*'] scale = parameter(filter_scales_64M()) @@ -191,7 +191,7 @@ class EESSI_OPENFOAM_LID_DRIVEN_CAVITY_8M(rfm.RunOnlyRegressionTest, EESSI_Mixin time_limit = '60m' readonly_files = [''] device_type = parameter([DEVICE_TYPES.CPU]) - module_name = parameter(find_modules('OpenFOAM/v', name_only=False)) + module_info = parameter(find_modules('OpenFOAM/v', name_only=False)) valid_systems = ['*'] scale = parameter(filter_scales_8M()) @@ -304,7 +304,7 @@ class EESSI_OPENFOAM_LID_DRIVEN_CAVITY_1M(rfm.RunOnlyRegressionTest, EESSI_Mixin time_limit = '60m' readonly_files = [''] device_type = parameter([DEVICE_TYPES.CPU]) - module_name = parameter(find_modules('OpenFOAM/v', name_only=False)) + module_info = parameter(find_modules('OpenFOAM/v', name_only=False)) valid_systems = ['*'] scale = parameter(filter_scales_1M()) is_ci_test = True diff --git a/eessi/testsuite/tests/apps/osu.py b/eessi/testsuite/tests/apps/osu.py index 9a15b229..7ee8e712 100644 --- a/eessi/testsuite/tests/apps/osu.py +++ b/eessi/testsuite/tests/apps/osu.py @@ -57,7 +57,7 @@ def filter_scales_coll(): class EESSI_OSU_Base(osu_benchmark): """ base class for OSU tests """ time_limit = '30m' - module_name = parameter(find_modules('OSU-Micro-Benchmarks')) + module_info = parameter(find_modules('OSU-Micro-Benchmarks')) used_cpus_per_task = 1 # reset num_tasks_per_node from the hpctestlib: we handle it ourselves diff --git a/eessi/testsuite/tests/apps/tensorflow/tensorflow.py b/eessi/testsuite/tests/apps/tensorflow/tensorflow.py index a3b68bbe..09070f0d 100644 --- a/eessi/testsuite/tests/apps/tensorflow/tensorflow.py +++ b/eessi/testsuite/tests/apps/tensorflow/tensorflow.py @@ -19,7 +19,7 @@ class EESSI_TensorFlow(rfm.RunOnlyRegressionTest, EESSI_Mixin): # Parameterize over all modules that start with TensorFlow - module_name = parameter(find_modules('TensorFlow')) + module_info = parameter(find_modules('TensorFlow')) # Make CPU and GPU versions of this test device_type = parameter([DEVICE_TYPES.CPU, DEVICE_TYPES.GPU]) diff --git a/eessi/testsuite/tests/apps/walberla/walberla.py b/eessi/testsuite/tests/apps/walberla/walberla.py index 8112d8c2..9fbdcc85 100644 --- a/eessi/testsuite/tests/apps/walberla/walberla.py +++ b/eessi/testsuite/tests/apps/walberla/walberla.py @@ -44,7 +44,7 @@ class EESSI_WALBERLA_BACKWARD_FACING_STEP(rfm.RunOnlyRegressionTest, EESSI_Mixin time_limit = '30m' readonly_files = [''] device_type = parameter([DEVICE_TYPES.CPU]) - module_name = parameter(find_modules('waLBerla')) + module_info = parameter(find_modules('waLBerla')) valid_systems = ['*'] scale = parameter(filter_scales()) diff --git a/eessi/testsuite/tests/libs/blas/blas.py b/eessi/testsuite/tests/libs/blas/blas.py index c4a02dae..e27b71a0 100644 --- a/eessi/testsuite/tests/libs/blas/blas.py +++ b/eessi/testsuite/tests/libs/blas/blas.py @@ -24,7 +24,7 @@ import reframe as rfm from reframe.core.builtins import parameter, run_after, run_before, sanity_function -import reframe.core.logging as rflog +from reframe.core.logging import getlogger import reframe.utility.sanity as sn from eessi.testsuite.constants import COMPUTE_UNITS, DEVICE_TYPES, SCALES @@ -42,27 +42,31 @@ def multi_thread_scales(): def get_blas_modules(blas_name): """ - Find available blas_name modules and (latest) matching BLIS module + Find available blas_name module_infos with (most recent) matching BLIS module - Returns: a list of lists: each inner list has the blas_name module as first item, - and the matching BLIS module as second item. The BLIS module must - be second to avoid segmentation fault for AOCL-BLAS + Returns: If blas_name is 'BLIS', returns the BLIS module_infos. + Otherwise, returns a list of tuples (syspart, env, [blas, blis]): + each inner list has the `blas_name` module as first item, and the + matching BLIS module as second item. The BLIS module must be + second to avoid segmentation fault for AOCL-BLAS. """ - blas_modules = list(find_modules(rf'{blas_name}$')) + blas_module_infos = list(find_modules(rf'^{blas_name}$')) if blas_name == 'BLIS': - return [[x] for x in blas_modules] + return [(x, y, [z]) for x, y, z in blas_module_infos] ml_lists = [] - blis_modules = list(find_modules('BLIS$')) + blis_module_infos = list(find_modules(r'^BLIS$')) - for mod in blas_modules: - matching_blises = sorted(select_matching_modules(blis_modules, mod)) - if not matching_blises: - msg = f'Skipping BLAS module {mod}: no matching BLIS module found.' - rflog.getlogger().warning(msg) + for blas_mod_info in blas_module_infos: + matching_blis_infos = select_matching_modules(blis_module_infos, blas_mod_info) + if not matching_blis_infos: + msg = f'Skipping BLAS module info {blas_mod_info}: no matching BLIS module found.' + getlogger().warning(msg) continue - blis = matching_blises[-1] - ml_lists.append([mod, blis]) + # assume the last BLIS module is the most recent one (find_modules sorts them) + blis = matching_blis_infos[-1][-1] + syspart, env, blas = blas_mod_info + ml_lists.append((syspart, env, [blas, blis])) return ml_lists @@ -72,20 +76,23 @@ def get_imkl_modules(): Find available imkl modules and (latest) BLIS module Only imkl modules with SYSTEM toolchain are used - Returns: a list of lists: each inner list has the imkl module as first item, - and the latest BLIS module as second item. + Returns: a list of tuples (syspart, env, [imkl, blis]): + each inner list has the imkl module as first item, + and the most recent BLIS module as second item. """ ml_lists = [] - blises = sorted(find_modules(r'BLIS$')) - if not blises: + blis_module_infos = list(find_modules(r'^BLIS$')) + if not blis_module_infos: log('no BLIS module found') return ml_lists - blis = blises[-1] + blis = blis_module_infos[-1][-1] - imkls = list(find_modules(r'imkl/[^-]*$', name_only=False)) - for imkl in imkls: - ml_lists.append([imkl, blis]) + # skip imkl modules with a toolchain other than SYSTEM (i.e. no toolchain in the module version) + imkl_module_infos = list(find_modules(r'^imkl/[^-]*$', name_only=False)) + for imkl_mod_info in imkl_module_infos: + syspart, env, imkl = imkl_mod_info + ml_lists.append((syspart, env, [imkl, blis])) return ml_lists @@ -169,7 +176,7 @@ class EESSI_BLAS_OpenBLAS_mt(EESSI_BLAS_base, EESSI_Mixin): "multi-threaded OpenBLAS test" scale = multi_thread_scales() - module_name = parameter(get_blas_modules('OpenBLAS')) + module_info = parameter(get_blas_modules('OpenBLAS')) flexiblas_blas_lib = 'openblas' tags = {'openblas'} is_ci_test = True @@ -181,7 +188,7 @@ class EESSI_BLAS_AOCLBLAS_mt(EESSI_BLAS_base, EESSI_Mixin): "multi-threaded AOCL-BLAS test" scale = multi_thread_scales() - module_name = parameter(get_blas_modules('AOCL-BLAS')) + module_info = parameter(get_blas_modules('AOCL-BLAS')) flexiblas_blas_lib = 'aocl_mt' tags = {'aocl-blas'} thread_binding = 'compact' @@ -192,17 +199,18 @@ class EESSI_BLAS_imkl_mt(EESSI_BLAS_base, EESSI_Mixin): "multi-threaded imkl test" scale = multi_thread_scales() - module_name = parameter(get_imkl_modules()) + module_info = parameter(get_imkl_modules()) flexiblas_blas_lib = 'imkl' tags = {'imkl'} thread_binding = 'compact' +@rfm.simple_test class EESSI_BLAS_BLIS_mt(EESSI_BLAS_base, EESSI_Mixin): "multi-threaded BLIS test" scale = multi_thread_scales() - module_name = parameter(get_blas_modules('BLIS')) + module_info = parameter(get_blas_modules('BLIS')) flexiblas_blas_lib = 'blis' tags = {'blis'} thread_binding = 'compact' diff --git a/eessi/testsuite/utils.py b/eessi/testsuite/utils.py index ef32b7c0..ff551947 100644 --- a/eessi/testsuite/utils.py +++ b/eessi/testsuite/utils.py @@ -6,23 +6,25 @@ import os import re import sys -from typing import Iterator, List +from typing import Iterator, List, Tuple import reframe as rfm from reframe.core.exceptions import ReframeFatalError from reframe.core.logging import getlogger import reframe.core.runtime as rt from reframe.frontend.printer import PrettyPrinter +from reframe.utility import typecheck as typ from eessi.testsuite.constants import DEVICE_TYPES printer = PrettyPrinter() # global variables -_available_modules = [] _eb_is_available = False _eb_avail_warning_is_printed = False _unique_msg_ids = [] +_modules_cache = {} +_mod_hierarchies = {} try: from easybuild.framework.easyconfig.easyconfig import get_toolchain_hierarchy @@ -103,6 +105,7 @@ def split_module(module: str) -> tuple: 2) there are no hyphens in the version, toolchain_name, or toolchain_version unless exceptions are added Exceptions: - toolchain_name 'intel-compilers' + - toolchain_name 'llvm-compilers' Arguments: - module: the full module name @@ -115,8 +118,8 @@ def split_module(module: str) -> tuple: if len(parts) == 1: # system toolchain, no versionsuffix parts.extend(['system', 'system']) - # special casing intel-compilers: - if parts[1] == 'intel' and parts[2] == 'compilers': + # special casing intel-compilers and llvm-compilers: + if parts[1] in ['intel', 'llvm'] and parts[2] == 'compilers': parts = [parts[0], '-'.join(parts[1:3])] + parts[3:] tcname = parts[1] @@ -128,83 +131,77 @@ def split_module(module: str) -> tuple: return name, version, tcname, tcversion, versionsuffix -def get_avail_modules() -> List[str]: - "get all available modules in the system" - # use global to avoid recalculating the list of available modules multiple times - global _available_modules - if not _available_modules: - ms = rt.runtime().modules_system - # Returns e.g. ['Bison/', 'Bison/3.7.6-GCCcore-10.3.0', 'BLIS/', 'BLIS/0.8.1-GCC-10.3.0'] - _available_modules = ms.available_modules('') - # Exclude anything without version, i.e. ending with / (e.g. Bison/) - _available_modules = [mod for mod in _available_modules if not mod.endswith('/')] - log(f"Total number of available modules: {len(_available_modules)}") - if not _available_modules: - msg = 'No available modules found on the system.' - raise EESSIError(msg) - return _available_modules - - -def find_modules(regex: str, name_only=True) -> Iterator[str]: +def find_modules(regex: str, environ_mapping=None, name_only=True) -> Iterator[Tuple[str, str, str]]: """ - Return all modules matching the regular expression regex. Note that since we use re.search, - a module matches if the regex matches the module name at any place. I.e. the match does - not have to be at the start of the smodule name + Similar to reframe.utility.find_modules: + - adds caching, so that we don't have to do repeated module avail calls + - adds the name_only argument Arguments: - - regex: a regular expression - - name_only: regular expressions will only be matched on the module name, not the version (default: True). - - Note: the name_only feature assumes anything after the last forward '/' is the version, - and strips that before doing a match. - - Example + - regex: regular expression used to find matching modules + - environ_mapping: environment mapping + - name_only: whether to match only the name of the module, not the version + """ - Suppose we have the following modules on a system: + if not isinstance(regex, str): + raise TypeError("'regex' argument must be a string") - gompic/2022a - gompi/2022a - CGAL/4.14.3-gompi-2022a + if (environ_mapping is not None and not isinstance(environ_mapping, typ.Dict[str, str])): + raise TypeError("'environ_mapping' argument must be of type Dict[str,str]") - The following calls would return the following respective modules + if name_only: + # Remove trailing slashes from the regex (in case the callee forgot) + regex = regex.rstrip('/') - find_modules('gompi') => [gompic/2022a, gompi/2022a] - find_modules('gompi$') => [gompi/2022a] - find_modules('gompi', name_only = False) => [gompic/2022a, gompi/2022a, CGAL/4.14.3-gompi-2022a] - find_modules('^gompi', name_only = False) => [gompic/2022a, gompi/2022a] - find_modules('^gompi/', name_only = False) => [gompi/2022a] - find_modules('-gompi-2022a', name_only = False) => [CGAL/4.14.3-gompi-2022a] + regex_re = re.compile(regex) + name_only_re = re.compile(r'/[^/]*$') - """ + def _is_valid_for_env(mod, env): + if environ_mapping is None: + return True - if not isinstance(regex, str): - raise TypeError("'substr' argument must be a string") - - seen = set() - dupes = [] - for mod in get_avail_modules(): - # The thing we yield should always be the original module name (orig_mod), including version - orig_mod = mod - if name_only: - # Remove trailing slashes from the regex (in case the callee forgot) - regex = regex.rstrip('/') - # Remove part after the last forward slash, as we assume this is the version - mod = re.sub('/[^/]*$', '', mod) - # Match the actual regular expression - log(f"Matching module {mod} with regex {regex}") - if re.search(regex, mod): - log("Match!") - if orig_mod in seen: - dupes.append(orig_mod) - else: - seen.add(orig_mod) - yield orig_mod - - if dupes: - err_msg = "EESSI test-suite cannot handle duplicate modules. " - err_msg += "Please make sure that only one is available on your system. " - err_msg += f"The following modules have a duplicate on your system: {dupes}" - raise ValueError(err_msg) + for patt, env in environ_mapping.items(): + if re.match(patt, mod) and env == env: + return True + + return False + + ms = rt.runtime().modules_system + current_system = rt.runtime().system + snap0 = rt.snapshot() + + for part in current_system.partitions: + if part.fullname not in _modules_cache: + _modules_cache[part.fullname] = {} + for env in part.environs: + rt.loadenv(part.local_env, env) + if env.name not in _modules_cache[part.fullname]: + log(f'Getting available modules for ({part.fullname}, {env.name})') + available_modules = sorted(ms.available_modules()) + _modules_cache[part.fullname][env.name] = [mod for mod in available_modules if not mod.endswith('/')] + snap0.restore() + seen = set() + dupes = [] + for mod in _modules_cache[part.fullname][env.name]: + modmod = mod + if name_only: + modmod = name_only_re.sub('', mod) + if regex_re.search(modmod): + log(f"Matched module {mod} with regex {regex}") + if mod in seen: + dupes.append(mod) + else: + seen.add(mod) + if _is_valid_for_env(mod, env.name): + yield (part.fullname, env.name, mod) + + if dupes: + err_msg = ( + "EESSI test-suite cannot handle duplicate modules. " + "Please make sure that only one is available on your system. " + f"The following modules have a duplicate on your system: {sorted(dupes)}" + ) + raise ValueError(err_msg) def get_tc_hierarchy(tcdict): @@ -227,40 +224,50 @@ def get_tc_hierarchy(tcdict): _eb_avail_warning_is_printed = True -def select_matching_modules(modules: List[str], ref_module: str) -> List[str]: +def select_matching_modules(module_infos: List[tuple], ref_module_info: tuple) -> List[tuple]: """ - Return from a list of modules all modules that match the + Return, from a list of module_info tuples, all module_info tuples with modules that match the toolchain of a reference module. Arguments: - - modules: list of modules from which a selection is made - - ref_module: the reference module + - module_infos: list of module info tuples from which a selection is made + - ref_module_info: the reference module info Requirements: - recent enough easybuild Python package """ - selected_mods = [] + selected_mod_infos = [] + + ref_syspart, ref_env, ref_mod = ref_module_info - ref_tcname, ref_tcversion = split_module(ref_module)[2:4] + ref_tcname, ref_tcversion = split_module(ref_mod)[2:4] ref_tcdict = {'name': ref_tcname, 'version': ref_tcversion} ref_hierarchy = get_tc_hierarchy(ref_tcdict) if not ref_hierarchy: return [] - for mod in modules: + for mod_info in module_infos: + syspart, env, mod = mod_info + # only select the ones that have the same system:partition and environment as the reference + if syspart != ref_syspart or env != ref_env: + continue + mod_tcname, mod_tcversion = split_module(mod)[2:4] mod_tcdict = {'name': mod_tcname, 'version': mod_tcversion} - mod_hierarchy = get_tc_hierarchy(mod_tcdict) + if mod not in _mod_hierarchies: + _mod_hierarchies[mod] = get_tc_hierarchy(mod_tcdict) + + mod_hierarchy = _mod_hierarchies[mod] if not mod_hierarchy: return [] # toolchain hierarchy does not contain super-toolchains, only sub-toolchains if ref_tcdict in mod_hierarchy or mod_tcdict in ref_hierarchy: - selected_mods.append(mod) + selected_mod_infos.append(mod_info) - return selected_mods + return selected_mod_infos def check_proc_attribute_defined(test: rfm.RegressionTest, attribute) -> bool: