From add7d893280ac8eb5b79276d448e50dc114f2c0c Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Sat, 13 May 2023 18:27:31 +0200 Subject: [PATCH 01/13] add missing cryptography dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0e05c9e..649c378 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ psutil requests rdflib timeout-decorator +cryptography From 87f1b3575a6f961b65a78cefa3fd79bce3bebee8 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Sat, 13 May 2023 19:24:44 +0200 Subject: [PATCH 02/13] fix freedesktop os warning --- bench_executor/collector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bench_executor/collector.py b/bench_executor/collector.py index da101d2..a295bf7 100644 --- a/bench_executor/collector.py +++ b/bench_executor/collector.py @@ -324,7 +324,10 @@ def __init__(self, case_name: str, results_run_path: str, system_os_version = 'UNKNOWN' try: system_os_name = platform.freedesktop_os_release()['NAME'] - system_os_version = platform.freedesktop_os_release()['VERSION'] + try: + system_os_version = platform.freedesktop_os_release()['VERSION_ID'] + except KeyError: + system_os_version = platform.freedesktop_os_release()['VERSION'] except (OSError, KeyError): self._logger.warning('Cannot extract Freedesktop OS release data') system_hostname = platform.node() From b8adc98e65858af72101de133a72ca26a3a5b2d9 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Sat, 13 May 2023 14:22:36 +0200 Subject: [PATCH 03/13] make pipeline file configurable (--metadata file.json) --- bench_executor/executor.py | 7 +++++-- exectool | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/bench_executor/executor.py b/bench_executor/executor.py index 009207e..0a19c84 100644 --- a/bench_executor/executor.py +++ b/bench_executor/executor.py @@ -39,7 +39,7 @@ class Executor: """ def __init__(self, main_directory: str, verbose: bool = False, - progress_cb=_progress_cb): + progress_cb=_progress_cb, metadata_filename=METADATA_FILE): """Create an instance of the Executor class. Parameters @@ -51,6 +51,8 @@ def __init__(self, main_directory: str, verbose: bool = False, process_cb : function Callback to call when a step is completed of the case. By default, a dummy callback is provided if the argument is missing. + metadata_filename : str + File name to look for step definitions. By default, metadata.json """ self._main_directory = os.path.abspath(main_directory) self._schema = {} @@ -58,6 +60,7 @@ def __init__(self, main_directory: str, verbose: bool = False, self._class_module_mapping: Dict[str, Any] = {} self._verbose = verbose self._progress_cb = progress_cb + self._metadata_filename = metadata_filename self._logger = Logger(__name__, self._main_directory, self._verbose) self._init_resources() @@ -553,7 +556,7 @@ def list(self) -> list: for directory in glob(self._main_directory): for root, dirs, files in os.walk(directory): for file in files: - if os.path.basename(file) == METADATA_FILE: + if os.path.basename(file) == self._metadata_filename: path = os.path.join(root, file) with open(path, 'r') as f: data = json.load(f) diff --git a/exectool b/exectool index 76012d3..18f2f1b 100755 --- a/exectool +++ b/exectool @@ -416,6 +416,10 @@ if __name__ == '__main__': parser.add_argument('--wait-for-user', dest='wait_for_user', help='Show a prompt when a step is executed before ' 'going to the next one', action='store_true') + parser.add_argument('--metadata', dest='metadata_filename', default='metadata.json', + help='File name with pipeline steps, ' + 'defaults to metadata.json (RMLMapper sample pipeline)', + type=str) args = parser.parse_args() # Resolve path @@ -433,6 +437,7 @@ if __name__ == '__main__': print(f'{parser.prog} {VERSION}') print(f'Command: {args.command}') print(f'Root directory: {main_directory}') + print(f'Metadata filename: {args.metadata_filename}') print(f'Verbose enabled: {args.verbose}') print(f'Number of runs: {args.number_of_runs}') print(f'Measurement sample interval: {args.interval}s') @@ -459,7 +464,7 @@ if __name__ == '__main__': except FileNotFoundError: pass - e = Executor(main_directory, verbose=args.verbose, progress_cb=progress_cb) + e = Executor(main_directory, verbose=args.verbose, progress_cb=progress_cb, metadata_filename=args.metadata_filename) if args.command == 'list': print_cases(e) From c47007f7e03bb8c1aff5664bd085e2a3473ee8ff Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Wed, 24 Apr 2024 15:20:52 +0200 Subject: [PATCH 04/13] add working_dir parameter --- bench_executor/container.py | 18 ++++++++++++------ bench_executor/docker.py | 8 ++++++-- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/bench_executor/container.py b/bench_executor/container.py index d947a51..cc7947e 100644 --- a/bench_executor/container.py +++ b/bench_executor/container.py @@ -96,7 +96,7 @@ def name(self) -> str: """The pretty name of the container""" return self._name - def run(self, command: str = '', detach=True) -> bool: + def run(self, command: str = '', *, working_dir=None, detach=True) -> bool: """Run the container. This is used for containers which are long running to provide services @@ -107,6 +107,8 @@ def run(self, command: str = '', detach=True) -> bool: command : str The command to execute in the container, optionally and defaults to no command. + working_dir : str + Set a working directory in the container (optional) detach : bool If the container may run in the background, default True. @@ -119,7 +121,7 @@ def run(self, command: str = '', detach=True) -> bool: v = self._volumes self._started, self._container_id = \ self._docker.run(self._container_name, command, self._name, detach, - self._ports, NETWORK_NAME, e, v) + self._ports, NETWORK_NAME, e, v, working_dir) if not self._started: self._logger.error(f'Starting container "{self._name}" failed!') @@ -155,7 +157,7 @@ def exec(self, command: str) -> Tuple[bool, List[str]]: return False, logs - def run_and_wait_for_log(self, log_line: str, command: str = '') -> bool: + def run_and_wait_for_log(self, log_line: str, command: str = '', *, working_dir=None) -> bool: """Run the container and wait for a log line to appear. This blocks until the container's log contains the `log_line`. @@ -167,13 +169,15 @@ def run_and_wait_for_log(self, log_line: str, command: str = '') -> bool: command : str The command to execute in the container, optionally and defaults to no command. + working_dir : str + Set a working directory in the container (optional) Returns ------- success : bool Whether the container exited with status code 0 or not. """ - if not self.run(command): + if not self.run(command, working_dir=working_dir): self._logger.error(f'Command "{command}" failed') return False @@ -212,7 +216,7 @@ def run_and_wait_for_log(self, log_line: str, command: str = '') -> bool: self._logger.error(line) return False - def run_and_wait_for_exit(self, command: str = '') -> bool: + def run_and_wait_for_exit(self, command: str = '', *, working_dir=None) -> bool: """Run the container and wait for exit This blocks until the container exit and gives a status code. @@ -222,13 +226,15 @@ def run_and_wait_for_exit(self, command: str = '') -> bool: command : str The command to execute in the container, optionally and defaults to no command. + working_dir : str + Set a working directory in the container (optional) Returns ------- success : bool Whether the container exited with status code 0 or not. """ - if not self.run(command): + if not self.run(command, working_dir=working_dir): return False if self._container_id is None: diff --git a/bench_executor/docker.py b/bench_executor/docker.py index 5843ce7..3390b3d 100644 --- a/bench_executor/docker.py +++ b/bench_executor/docker.py @@ -9,7 +9,7 @@ import json import subprocess from time import sleep -from typing import List, Tuple +from typing import List, Tuple, Optional from bench_executor.logger import Logger @@ -145,7 +145,7 @@ def pull(self, image: str) -> bool: def run(self, image: str, command: str, name: str, detach: bool, ports: dict, network: str, environment: dict, - volumes: List[str], must_pull: bool = True) -> Tuple[bool, str]: + volumes: List[str], workdir: Optional[str], must_pull: bool = True) -> Tuple[bool, str]: """Start a Docker container. Parameters @@ -166,6 +166,8 @@ def run(self, image: str, command: str, name: str, detach: bool, Environment variables to set. volumes : List[str] Volumes to mount on the container from the host. + workdir : str + Working directory for the container. must_pull: bool Whether the image should be pulled first, default is True. @@ -206,6 +208,8 @@ def run(self, image: str, command: str, name: str, detach: bool, for volume in volumes: cmd += f' -v "{volume}"' cmd += f' --network "{network}"' + if workdir is not None: + cmd += f' --workdir "{workdir}"' cmd += f' {image} {command}' self._logger.debug(f'Starting Docker container: {cmd}') status_code, container_id = subprocess.getstatusoutput(cmd) From ad9ca40c1d2dc2a5f1fb2a37311163180d1a4fa6 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Wed, 24 Apr 2024 20:04:52 +0200 Subject: [PATCH 05/13] add rpt executor --- bench_executor/rpt.py | 78 +++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 1 + 2 files changed, 79 insertions(+) create mode 100644 bench_executor/rpt.py diff --git a/bench_executor/rpt.py b/bench_executor/rpt.py new file mode 100644 index 0000000..b347ed5 --- /dev/null +++ b/bench_executor/rpt.py @@ -0,0 +1,78 @@ +""" +RPT is a general purpose RDF tool + +**Website**: https://github.com/SmartDataAnalytics/RdfProcessingToolkit + +""" + +VERSION='1.9.8-SNAPSHOT' +TIMEOUT = 6 * 3600 # 6 hours + +import os +import shlex +from timeout_decorator import timeout, TimeoutError # type: ignore +from bench_executor.container import Container +from bench_executor.logger import Logger + + +class Rpt(Container): + """RPT container for executing rmltk, sansa etc.""" + + _INSTANCES = 0 + + def __init__(self, data_path: str, config_path: str, directory: str, + verbose: bool, expect_failure: bool = False): + self._instance = Rpt._INSTANCES + Rpt._INSTANCES = Rpt._INSTANCES + 1 + + self._data_path = os.path.abspath(data_path) + self._config_path = os.path.abspath(config_path) + self._logger = Logger(__name__ + '.' + str(self._instance), directory, verbose) + self._verbose = verbose + + os.makedirs(os.path.join(self._data_path, 'rpt'), exist_ok=True) + super().__init__(f'aksw/rpt:{VERSION}', 'rpt' + '-' + str(self._instance), + self._logger, expect_failure=expect_failure, + volumes=[f'{self._data_path}/rpt:/data', + f'{self._data_path}/shared:/data/shared']) + + @timeout(TIMEOUT) + def _execute_with_timeout(self, arguments: list, *, working_dir=None) -> bool: + """Execute a mapping with a provided timeout. + + Returns + ------- + success : bool + Whether the execution was successfull or not. + """ + return self.run_and_wait_for_exit(' '.join(map(shlex.quote, arguments)), + working_dir=working_dir) + + def execute(self, command, arguments=None, working_dir='/data/shared') -> bool: + """Execute rpt with given arguments. + + Parameters + ---------- + command : str + Command to run + arguments : list + Arguments to supply to rpt. + + Returns + ------- + success : bool + Whether the execution succeeded or not. + """ + if arguments is None: + arguments = [] + self._logger.debug(f'{self._instance}: Calling rpt {command} with {arguments!r}') + try: + result = self._execute_with_timeout([*command.split(' '), *arguments], + working_dir=working_dir) + self.stop() + return result + except TimeoutError: + msg = f'{self._instance}: Timeout ({TIMEOUT}s) reached for rpt' + self._logger.warning(msg) + + return False diff --git a/requirements.txt b/requirements.txt index 649c378..fe439b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ requests rdflib timeout-decorator cryptography +pyyaml From a9d5de2eb853f981e2942bdd578402166d70ea51 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Sat, 13 May 2023 18:20:40 +0200 Subject: [PATCH 06/13] add rmltk template and rpt-arq config --- .gitmodules | 3 +++ rmltk-templates | 1 + 2 files changed, 4 insertions(+) create mode 100644 .gitmodules create mode 160000 rmltk-templates diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..90030fb --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "rmltk-templates"] + path = rmltk-templates + url = https://github.com/SimonBin/kgc-challenge-tool-template.git diff --git a/rmltk-templates b/rmltk-templates new file mode 160000 index 0000000..a99c5a3 --- /dev/null +++ b/rmltk-templates @@ -0,0 +1 @@ +Subproject commit a99c5a3610448697b4e562fd87f9f0e9754b9996 From a7ea2d15591e22f2e4829823f4be7ee5816ec37d Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Thu, 25 Apr 2024 18:07:07 +0200 Subject: [PATCH 07/13] test single name config --- bench_executor/rpt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench_executor/rpt.py b/bench_executor/rpt.py index b347ed5..68064ed 100644 --- a/bench_executor/rpt.py +++ b/bench_executor/rpt.py @@ -31,7 +31,7 @@ def __init__(self, data_path: str, config_path: str, directory: str, self._verbose = verbose os.makedirs(os.path.join(self._data_path, 'rpt'), exist_ok=True) - super().__init__(f'aksw/rpt:{VERSION}', 'rpt' + '-' + str(self._instance), + super().__init__(f'aksw/rpt:{VERSION}', 'rpt-kgcc', self._logger, expect_failure=expect_failure, volumes=[f'{self._data_path}/rpt:/data', f'{self._data_path}/shared:/data/shared']) From b4c102ac92843d317d673e1211da5e26d752c690 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Tue, 30 Apr 2024 15:52:47 +0200 Subject: [PATCH 08/13] add environment support to bench_executor/container run method --- bench_executor/container.py | 10 ++++++---- bench_executor/rpt.py | 15 +++++++++------ rmltk-templates | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/bench_executor/container.py b/bench_executor/container.py index cc7947e..5295f6e 100644 --- a/bench_executor/container.py +++ b/bench_executor/container.py @@ -96,7 +96,7 @@ def name(self) -> str: """The pretty name of the container""" return self._name - def run(self, command: str = '', *, working_dir=None, detach=True) -> bool: + def run(self, command: str = '', *, working_dir=None, detach=True, environment=None) -> bool: """Run the container. This is used for containers which are long running to provide services @@ -117,7 +117,9 @@ def run(self, command: str = '', *, working_dir=None, detach=True) -> bool: success : bool Whether running the container was successfull or not. """ - e = self._environment + if environment is None: + environment = {} + e = {**self._environment, **environment} v = self._volumes self._started, self._container_id = \ self._docker.run(self._container_name, command, self._name, detach, @@ -216,7 +218,7 @@ def run_and_wait_for_log(self, log_line: str, command: str = '', *, working_dir= self._logger.error(line) return False - def run_and_wait_for_exit(self, command: str = '', *, working_dir=None) -> bool: + def run_and_wait_for_exit(self, command: str = '', *, working_dir=None, environment=None) -> bool: """Run the container and wait for exit This blocks until the container exit and gives a status code. @@ -234,7 +236,7 @@ def run_and_wait_for_exit(self, command: str = '', *, working_dir=None) -> bool: success : bool Whether the container exited with status code 0 or not. """ - if not self.run(command, working_dir=working_dir): + if not self.run(command, working_dir=working_dir, environment=environment): return False if self._container_id is None: diff --git a/bench_executor/rpt.py b/bench_executor/rpt.py index 68064ed..d6f166a 100644 --- a/bench_executor/rpt.py +++ b/bench_executor/rpt.py @@ -34,10 +34,11 @@ def __init__(self, data_path: str, config_path: str, directory: str, super().__init__(f'aksw/rpt:{VERSION}', 'rpt-kgcc', self._logger, expect_failure=expect_failure, volumes=[f'{self._data_path}/rpt:/data', - f'{self._data_path}/shared:/data/shared']) + f'{self._data_path}/shared:/data/shared', + f'{self._data_path}/tmp:/tmp']) @timeout(TIMEOUT) - def _execute_with_timeout(self, arguments: list, *, working_dir=None) -> bool: + def _execute_with_timeout(self, arguments: list, *, working_dir=None, environment=None) -> bool: """Execute a mapping with a provided timeout. Returns @@ -46,9 +47,10 @@ def _execute_with_timeout(self, arguments: list, *, working_dir=None) -> bool: Whether the execution was successfull or not. """ return self.run_and_wait_for_exit(' '.join(map(shlex.quote, arguments)), - working_dir=working_dir) + working_dir=working_dir, + environment=environment) - def execute(self, command, arguments=None, working_dir='/data/shared') -> bool: + def execute(self, command, arguments=None, environment=None, working_dir='/data/shared') -> bool: """Execute rpt with given arguments. Parameters @@ -65,10 +67,11 @@ def execute(self, command, arguments=None, working_dir='/data/shared') -> bool: """ if arguments is None: arguments = [] - self._logger.debug(f'{self._instance}: Calling rpt {command} with {arguments!r}') + self._logger.debug(f'{self._instance}: Calling rpt {command} with {arguments!r} and {environment!r}') try: result = self._execute_with_timeout([*command.split(' '), *arguments], - working_dir=working_dir) + working_dir=working_dir, + environment=environment) self.stop() return result except TimeoutError: diff --git a/rmltk-templates b/rmltk-templates index a99c5a3..ded0faa 160000 --- a/rmltk-templates +++ b/rmltk-templates @@ -1 +1 @@ -Subproject commit a99c5a3610448697b4e562fd87f9f0e9754b9996 +Subproject commit ded0faa42a52b39ea11b254e6be6e70ff73d6584 From a8c30325b8f18189611d0178c8deaa8a546df463 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Tue, 30 Apr 2024 18:37:23 +0200 Subject: [PATCH 09/13] Implement global_environment key in metadata.json to configure java memory in a single place --- bench_executor/container.py | 22 +++++++++++++++++++++- bench_executor/data/metadata.schema | 4 ++++ bench_executor/executor.py | 6 ++++-- bench_executor/mysql.py | 9 +++++++-- bench_executor/postgresql.py | 9 +++++++-- bench_executor/rmlmapper.py | 5 ++++- bench_executor/rpt.py | 5 ++++- bench_executor/virtuoso.py | 9 +++++++-- rmltk-templates | 2 +- 9 files changed, 59 insertions(+), 12 deletions(-) diff --git a/bench_executor/container.py b/bench_executor/container.py index 5295f6e..57c28fd 100644 --- a/bench_executor/container.py +++ b/bench_executor/container.py @@ -119,7 +119,27 @@ def run(self, command: str = '', *, working_dir=None, detach=True, environment=N """ if environment is None: environment = {} - e = {**self._environment, **environment} + + def merge_env(e1, e2): + r = {} + for key in set(e1.keys()).union(e2.keys()): + if key in e2: + in_e1 = key in e1 + is_arr = isinstance(e2[key], list) or (in_e1 and isinstance(e1[key], list)) + if in_e1 and (is_arr or key == "JDK_JAVA_OPTIONS"): + if is_arr: + r[key] = [*e1[key], *e2[key]] + else: + r[key] = f'{e1[key]} {e2[key]}' + else: + r[key] = e2[key] + else: + r[key] = e1[key] + if isinstance(r[key], list): + r[key] = ' '.join(r[key]) + return r + + e = merge_env(self._environment, environment) v = self._volumes self._started, self._container_id = \ self._docker.run(self._container_name, command, self._name, detach, diff --git a/bench_executor/data/metadata.schema b/bench_executor/data/metadata.schema index 7f9de04..04dc2df 100644 --- a/bench_executor/data/metadata.schema +++ b/bench_executor/data/metadata.schema @@ -16,6 +16,10 @@ "description": "Short description of the case", "type": "string" }, + "global_environment": { + "description": "Variables to set in the environment to pass to the containers", + "type": "object" + }, "steps": { "description": "Short description of the case", "type": "array", diff --git a/bench_executor/executor.py b/bench_executor/executor.py index 0a19c84..e287895 100644 --- a/bench_executor/executor.py +++ b/bench_executor/executor.py @@ -396,7 +396,8 @@ def run(self, case: dict, interval: float, module = self._class_module_mapping[step['resource']] resource = getattr(module, step['resource'])(data_path, CONFIG_DIR, directory, - self._verbose, False) + self._verbose, False, + environment=data.get('global_environment')) if hasattr(resource, 'initialization'): if not resource.initialization(): self._logger.error('Failed to initialize resource ' @@ -419,7 +420,8 @@ def run(self, case: dict, interval: float, resource = getattr(module, step['resource'])(data_path, CONFIG_DIR, directory, self._verbose, - expect_failure) + expect_failure, + environment=data.get('global_environment')) active_resources.append(resource) # Containers may need to start up first before executing a command diff --git a/bench_executor/mysql.py b/bench_executor/mysql.py index faf5f16..1cf3d3e 100644 --- a/bench_executor/mysql.py +++ b/bench_executor/mysql.py @@ -29,7 +29,7 @@ class MySQL(Container): """MySQL container for executing SQL queries.""" def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool = False): + verbose: bool, expect_failure: bool = False, environment=None): """Creates an instance of the MySQL class. Parameters @@ -44,6 +44,8 @@ def __init__(self, data_path: str, config_path: str, directory: str, Enable verbose logs. expect_failure : bool If we expect a failure or not. + environment : dict + Additional environment variables to use in the container. """ self._data_path = os.path.abspath(data_path) self._config_path = os.path.abspath(config_path) @@ -54,11 +56,14 @@ def __init__(self, data_path: str, config_path: str, directory: str, os.makedirs(tmp_dir, exist_ok=True) os.makedirs(os.path.join(self._data_path, 'mysql'), exist_ok=True) + if environment is None: + environment = {} super().__init__(f'kgconstruct/mysql:v{VERSION}', 'MySQL', self._logger, expect_failure=expect_failure, ports={PORT: PORT}, - environment={'MYSQL_ROOT_PASSWORD': 'root', + environment={**environment, + 'MYSQL_ROOT_PASSWORD': 'root', 'MYSQL_DATABASE': 'db'}, volumes=[f'{self._data_path}/shared/:/data/shared', f'{self._config_path}/mysql/' diff --git a/bench_executor/postgresql.py b/bench_executor/postgresql.py index 5710212..8b6c6d5 100644 --- a/bench_executor/postgresql.py +++ b/bench_executor/postgresql.py @@ -31,7 +31,7 @@ class PostgreSQL(Container): """PostgreSQL container for executing SQL queries""" def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool = False): + verbose: bool, expect_failure: bool = False, environment=None): """Creates an instance of the PostgreSQL class. Parameters @@ -46,6 +46,8 @@ def __init__(self, data_path: str, config_path: str, directory: str, Enable verbose logs. expect_failure : bool If a failure is expected. + environment : dict + Additional environment variables to use in the container. """ self._data_path = os.path.abspath(data_path) self._config_path = os.path.abspath(config_path) @@ -57,10 +59,13 @@ def __init__(self, data_path: str, config_path: str, directory: str, os.makedirs(os.path.join(self._data_path, 'postgresql'), exist_ok=True) self._tables: List[str] = [] + if environment is None: + environment = {} super().__init__(f'blindreviewing/postgresql:v{VERSION}', 'PostgreSQL', self._logger, ports={PORT: PORT}, - environment={'POSTGRES_PASSWORD': PASSWORD, + environment={**environment, + 'POSTGRES_PASSWORD': PASSWORD, 'POSTGRES_USER': USER, 'POSTGRES_DB': DB, 'PGPASSWORD': PASSWORD, diff --git a/bench_executor/rmlmapper.py b/bench_executor/rmlmapper.py index 8367485..9e862ec 100644 --- a/bench_executor/rmlmapper.py +++ b/bench_executor/rmlmapper.py @@ -23,7 +23,7 @@ class RMLMapper(Container): """RMLMapper container for executing R2RML and RML mappings.""" def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool = False): + verbose: bool, expect_failure: bool = False, environment=None): """Creates an instance of the RMLMapper class. Parameters @@ -38,6 +38,8 @@ def __init__(self, data_path: str, config_path: str, directory: str, Enable verbose logs. expect_failure : bool If a failure is expected, default False. + environment : dict + Additional environment variables to use in the container. """ self._data_path = os.path.abspath(data_path) self._config_path = os.path.abspath(config_path) @@ -47,6 +49,7 @@ def __init__(self, data_path: str, config_path: str, directory: str, os.makedirs(os.path.join(self._data_path, 'rmlmapper'), exist_ok=True) super().__init__(f'kgconstruct/rmlmapper:v{VERSION}', 'RMLMapper', self._logger, expect_failure=expect_failure, + environment=environment, volumes=[f'{self._data_path}/rmlmapper:/data', f'{self._data_path}/shared:/data/shared']) diff --git a/bench_executor/rpt.py b/bench_executor/rpt.py index d6f166a..68eecec 100644 --- a/bench_executor/rpt.py +++ b/bench_executor/rpt.py @@ -21,10 +21,12 @@ class Rpt(Container): _INSTANCES = 0 def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool = False): + verbose: bool, expect_failure: bool = False, environment=None): self._instance = Rpt._INSTANCES Rpt._INSTANCES = Rpt._INSTANCES + 1 + if environment is None: + environment = {} self._data_path = os.path.abspath(data_path) self._config_path = os.path.abspath(config_path) self._logger = Logger(__name__ + '.' + str(self._instance), directory, verbose) @@ -33,6 +35,7 @@ def __init__(self, data_path: str, config_path: str, directory: str, os.makedirs(os.path.join(self._data_path, 'rpt'), exist_ok=True) super().__init__(f'aksw/rpt:{VERSION}', 'rpt-kgcc', self._logger, expect_failure=expect_failure, + environment=environment, volumes=[f'{self._data_path}/rpt:/data', f'{self._data_path}/shared:/data/shared', f'{self._data_path}/tmp:/tmp']) diff --git a/bench_executor/virtuoso.py b/bench_executor/virtuoso.py index 8db5f7e..7e8f21f 100644 --- a/bench_executor/virtuoso.py +++ b/bench_executor/virtuoso.py @@ -43,7 +43,7 @@ class Virtuoso(Container): """Virtuoso container to execute SPARQL queries""" def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool = False): + verbose: bool, expect_failure: bool = False, environment=None): """Creates an instance of the Virtuoso class. Parameters @@ -58,6 +58,8 @@ def __init__(self, data_path: str, config_path: str, directory: str, Enable verbose logs. expect_failure : bool If a failure is expected, default False. + environment : dict + Additional environment variables to use in the container. """ self._data_path = os.path.abspath(data_path) self._config_path = os.path.abspath(config_path) @@ -71,7 +73,10 @@ def __init__(self, data_path: str, config_path: str, directory: str, * NUMBER_OF_BUFFERS_PER_GB) max_dirty_buffers = int(psutil.virtual_memory().total / (10**9) * MAX_DIRTY_BUFFERS_PER_GB) - environment = {'DBA_PASSWORD': PASSWORD, + if environment is None: + environment = {} + environment = {**environment, + 'DBA_PASSWORD': PASSWORD, 'VIRT_SPARQL_ResultSetMaxRows': MAX_ROWS, 'VIRT_SPARQL_MaxQueryExecutionTime': QUERY_TIMEOUT, 'VIRT_SPARQL_ExecutionTimeout': QUERY_TIMEOUT, diff --git a/rmltk-templates b/rmltk-templates index ded0faa..ec030e1 160000 --- a/rmltk-templates +++ b/rmltk-templates @@ -1 +1 @@ -Subproject commit ded0faa42a52b39ea11b254e6be6e70ff73d6584 +Subproject commit ec030e131efcc49cdbf60b9d41e80fb5dce3a706 From 27b3879cdb71ff6abe673667f44bb9ff7e07c35f Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Wed, 15 May 2024 13:38:55 +0200 Subject: [PATCH 10/13] add missing environment key to Validate tool resource --- bench_executor/query.py | 2 +- bench_executor/validate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench_executor/query.py b/bench_executor/query.py index 70ba141..f1310fd 100644 --- a/bench_executor/query.py +++ b/bench_executor/query.py @@ -18,7 +18,7 @@ class Query(): """Execute a query on a SPARQL endpoint.""" def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool = False): + verbose: bool, expect_failure: bool = False, environment=None): """Creates an instance of the Query class. Parameters diff --git a/bench_executor/validate.py b/bench_executor/validate.py index 52ae702..1f91f36 100644 --- a/bench_executor/validate.py +++ b/bench_executor/validate.py @@ -13,7 +13,7 @@ class Validate(): """Validate the RDF graph by comparing it with an expected graph""" def __init__(self, data_path: str, config_path: str, directory: str, - verbose: bool, expect_failure: bool): + verbose: bool, expect_failure: bool, environment=None): """Creates an instance of the Validate class. Parameters From 9d7db18e9888c5cc44a144452df792372d8f1a56 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Wed, 15 May 2024 14:17:53 +0200 Subject: [PATCH 11/13] update 2024 challenge file to version 1.0.0, contains some fixes --- exectool | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exectool b/exectool index 18f2f1b..6e1f4fe 100755 --- a/exectool +++ b/exectool @@ -23,7 +23,7 @@ EXIT_CODE_INTERRUPTED = -7 EXIT_CODE_DOWNLOAD_FAILURE = -8 KGC_CHALLENGE_2023_URL = 'https://zenodo.org/record/7689310/files/challenge.tar.gz?download=1' # noqa: E501 KGC_CHALLENGE_2023_FILE_NAME = 'eswc-kgc-challenge-2023.tar.gz' -KGC_CHALLENGE_2024_URL = 'https://zenodo.org/record/10721875/files/challenge.tar.gz?download=1' # noqa: E501 +KGC_CHALLENGE_2024_URL = 'https://zenodo.org/record/10973433/files/challenge.tar.gz?download=1' # noqa: E501 KGC_CHALLENGE_2024_FILE_NAME = 'eswc-kgc-challenge-2024.tar.gz' DOWNLOAD_DIR = 'downloads' CHUNK_SIZE = 8192 From 9315921c1d812aa202394c76818ca5ffdea4b402 Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Wed, 15 May 2024 14:21:55 +0200 Subject: [PATCH 12/13] add config for rml2exec on track1/rml-core --- rmltk-templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmltk-templates b/rmltk-templates index ec030e1..ff12eca 160000 --- a/rmltk-templates +++ b/rmltk-templates @@ -1 +1 @@ -Subproject commit ec030e131efcc49cdbf60b9d41e80fb5dce3a706 +Subproject commit ff12ecacde2fddbd26c9cf984cdae958e094ba54 From d92b1d73d57c04017f2e5cbcb067bbac51f385eb Mon Sep 17 00:00:00 2001 From: Simon Bin Date: Wed, 15 May 2024 19:36:57 +0200 Subject: [PATCH 13/13] add expect_failure property to YAML template and config file --- rmltk-templates | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rmltk-templates b/rmltk-templates index ff12eca..c1158c2 160000 --- a/rmltk-templates +++ b/rmltk-templates @@ -1 +1 @@ -Subproject commit ff12ecacde2fddbd26c9cf984cdae958e094ba54 +Subproject commit c1158c23e475bb9e6a453588b732a93a58bae896