Switch from Multi Threading to Multi Processing (MetOffice#206)

r-sharp · web-flow · commit fef68435d735 · 2026-03-11T11:50:00.000Z
* Yet another weird round of having to repeatedly accept the same changes which git / VS Code are marking as in conflict.
Annoyingly, this seems to have introduced a bug as well.

* Fixing issue with external runners not establishing properly.

* Fixing an error that seems to have crept in during merging main and resolving clashes. But looking at it, I really can't fathom out 'how'

* Where do these line length errors in the linter(s) come from - I haven't edited this file...

* quick "improvement" to the error reporting in one of the tests

* Why am I constantly having to re-impliment fixes/tidying I'm sure I've done before.

* And now ruff_format wants to change them back again...

* Fix issue with "TLD" being repeated for paths when using --fullcheck

* Undoing auto formatting again as github CI doesn't agree with it (picard.gif)

* Remove the thread lock (not threading any more) to allow TestResult to be pickled

* 1st draft switching to multiprocessing.

* Functioning multiprocessor version using ProcessPoolExecuter

* Tidy and reduce default max-workers to 2

* Remove extraneous print call that merging main seems to have reinstated.
diff --git a/script_umdp3_checker/umdp3_checker_rules.py b/script_umdp3_checker/umdp3_checker_rules.py
@@ -10,7 +10,6 @@
 """
 
 import re
-import threading
 from typing import List, Dict
 from fortran_keywords import fortran_keywords
 from search_lists import (
@@ -65,7 +64,6 @@ class UMDP3Checker:
 
     def __init__(self):
         self._extra_error_info = {}
-        self._lock = threading.Lock()
         """
     TODO: The Perl version had a dodgy looking subroutine to calculate
         this, but I can't find where it was called from within the files in
@@ -81,8 +79,7 @@ def reset_extra_error_information(self):
         Appears to be used 'between' blocks of tests such as those on diffs and
         those on full files.
         """
-        with self._lock:
-            self._extra_error_info = {}
+        self._extra_error_info = {}
 
     def get_extra_error_information(self) -> Dict:
         """
@@ -94,16 +91,14 @@ def get_extra_error_information(self) -> Dict:
         actual failures and not just the count. However, this information
         doesn't seem to be output as yet and will need implementing.
         """
-        with self._lock:
-            return self._extra_error_info.copy()
+        return self._extra_error_info.copy()
 
     def add_extra_error(self, key: str, value: str = ""):
         """Add extra error information to the dictionary"""
         """
     TODO: The usefulness of the information added has not been assessed,
         nor does it appear to be reported as yet."""
-        with self._lock:
-            self._extra_error_info[key] = value
+        self._extra_error_info[key] = value
 
     def add_error_log(
         self, error_log: Dict, key: str = "no key", value: int = 0
@@ -188,7 +183,6 @@ def capitulated_keywords(self, lines: List[str]) -> TestResult:
             failure_count=failures,
             passed=(failures == 0),
             output=f"Checked {line_count} lines, found {failures} failures.",
-            # errors=self.get_extra_error_information()
             errors=error_log,
         )
 
diff --git a/script_umdp3_checker/umdp3_conformance.py b/script_umdp3_checker/umdp3_conformance.py
@@ -202,11 +202,6 @@ def create_external_runners(
     ) -> "StyleChecker":
         """Create a StyleChecker instance filtering files from a full list."""
         filtered_files = cls.filter_files(all_files, file_extensions)
-        print(
-            f"Creating external runners for {name} with {len(commands)} "
-            f"commands and {len(filtered_files)} files to check from a "
-            f"total of {len(all_files)} files."
-        )
         check_functions = {}
         for command in commands:
             external_opname = f"External_operation_{command[0]}"
@@ -285,7 +280,7 @@ class ConformanceChecker:
     def __init__(
         self,
         checkers: List[StyleChecker],
-        max_workers: int = 8,
+        max_workers: int = 2,
     ):
         self.checkers = checkers
         self.max_workers = max_workers
@@ -324,15 +319,16 @@ def check_files(self) -> None:
         then have each worker run all the checks for a given file. This would
         reduce the overhead of creating threads and allow for better use of
         resources."""
-        with concurrent.futures.ThreadPoolExecutor(
+        with concurrent.futures.ProcessPoolExecutor(
             max_workers=self.max_workers
         ) as executor:
             future_to_task = {
                 executor.submit(checker.check, file_path): file_path
                 for checker in self.checkers
                 for file_path in checker.files_to_check
             }
-
+            # TODO : This next loop could be used to process the individual results as
+            # they come in, rather than waiting for all to complete.
             for future in concurrent.futures.as_completed(future_to_task):
                 result = future.result()
                 results.append(result)
@@ -403,7 +399,7 @@ def process_arguments():
         "-p", "--path", type=str, default="./", help="path to repository"
     )
     parser.add_argument(
-        "--max-workers", type=int, default=8, help="Maximum number of parallel workers"
+        "--max-workers", type=int, default=2, help="Maximum number of parallel workers"
     )
     parser.add_argument(
         "--fullcheck",
@@ -417,11 +413,11 @@ def process_arguments():
         help="Print details of passed checks as well as failed ones.\n"
         "By default, only failed checks are printed in detail.",
     )
-    group = parser.add_mutually_exclusive_group()
-    group.add_argument(
+    verbosity_grp = parser.add_mutually_exclusive_group()
+    verbosity_grp.add_argument(
         "-v", "--verbose", action="count", default=0, help="Increase output verbosity"
     )
-    group.add_argument(
+    verbosity_grp.add_argument(
         "-q", "--quiet", action="count", default=0, help="Decrease output verbosity"
     )
     # The following are not yet implemented, but may become useful