Linaro · katieworton · Jul 17, 2024 · Jul 24, 2024 · Jul 25, 2024 · Jul 25, 2024
diff --git a/squad/plugins/linux_log_parser.py b/squad/plugins/linux_log_parser.py
@@ -4,26 +4,28 @@
 from collections import defaultdict
 from squad.plugins import Plugin as BasePlugin
 from squad.core.models import SuiteMetadata
+from django.template.defaultfilters import slugify
 
 
 logger = logging.getLogger()
 
 REGEX_NAME = 0
 REGEX_BODY = 1
+REGEX_EXTRACT_NAME = 2
 
 MULTILINERS = [
-    ('check-kernel-exception', r'-+\[? cut here \]?-+.*?-+\[? end trace \w* \]?-+'),
-    ('check-kernel-kasan', r'=+\n\[[\s\.\d]+\]\s+BUG: KASAN:.*?=+'),
-    ('check-kernel-kfence', r'=+\n\[[\s\.\d]+\]\s+BUG: KFENCE:.*?=+'),
+    ('check-kernel', r'-+\[? cut here \]?-+.*?-+\[? end trace \w* \]?-+', r"\d][^\+\n]*"),
+    ('check-kernel', r'=+\n\[[\s\.\d]+\]\s+BUG: KASAN:.*?=+', r"BUG: KASAN:[^\+\n]*"),
+    ('check-kernel', r'=+\n\[[\s\.\d]+\]\s+BUG: KFENCE:.*?=+', r"BUG: KFENCE:[^\+\n]*"),
 ]
 
 ONELINERS = [
-    ('check-kernel-oops', r'^[^\n]+Oops(?: -|:).*?$'),
-    ('check-kernel-fault', r'^[^\n]+Unhandled fault.*?$'),
-    ('check-kernel-warning', r'^[^\n]+WARNING:.*?$'),
-    ('check-kernel-bug', r'^[^\n]+(?: kernel BUG at|BUG:).*?$'),
-    ('check-kernel-invalid-opcode', r'^[^\n]+invalid opcode:.*?$'),
-    ('check-kernel-panic', r'Kernel panic - not syncing.*?$'),
+    ('check-kernel', r'^[^\n]+Oops(?: -|:).*?$', r"Oops[^\+\n]*"),
+    ('check-kernel', r'^[^\n]+Unhandled fault.*?$', r"Unhandled [^\+\n]*"),
+    ('check-kernel', r'^[^\n]+WARNING:.*?$', r"WARNING: [^\+\n]*"),
+    ('check-kernel', r'^[^\n]+(?: kernel BUG at|BUG:).*?$', r"BUG: [^\+\n]*"),
+    ('check-kernel', r'^[^\n]+invalid opcode:.*?$', r"invalid opcode: [^\+\n]*"),
+    ('check-kernel', r'Kernel panic - not syncing.*?$', r"Kernel [^\+\n]*"),
 ]
 
 # Tip: broader regexes should come first
@@ -70,33 +72,55 @@ def __join_matches(self, matches, regexes):
                     snippets[regex_id].append(match[regex_id])
         return snippets
 
-    def __create_tests(self, testrun, suite, test_name, lines):
+    def __create_tests(self, testrun, suite, test_name, lines, test_regex=None):
         """
         There will be at least one test per regex. If there were any match for a given
         regex, then a new test will be generated using test_name + shasum. This helps
         comparing kernel logs accross different builds
         """
-        metadata, _ = SuiteMetadata.objects.get_or_create(suite=suite.slug, name=test_name, kind='test')
-        testrun.tests.create(
-            suite=suite,
-            result=(len(lines) == 0),
-            log='\n'.join(lines),
-            metadata=metadata,
-            build=testrun.build,
-            environment=testrun.environment,
-        )
-
-        # Some lines of the matched regex might be the same, and we don't want to create
-        # multiple tests like test1-sha1, test1-sha1, etc, so we'll create a set of sha1sums
-        # then create only new tests for unique sha's
+        # Run the REGEX_EXTRACT_NAME regex over the log lines to sort them by
+        # extracted name. If no name is extracted or the log parser did not
+        # have any output for a particular regex, just use the default name
+        # (for example "check-kernel-oops").
+        tests_to_create = defaultdict(set)
         shas = defaultdict(set)
-        for line in lines:
-            sha = self.__create_shasum(line)
-            shas[sha].add(line)
 
-        for sha, lines in shas.items():
-            name = f'{test_name}-{sha}'
+        # If there are no lines, use the default name and create a passing
+        # test. For example "check-kernel-oops"
+        if not lines:
+            tests_to_create[test_name] = []
+
+        # If there are lines, then create the tests for these.
+        for line in lines:
+            extracted_name = self.__create_name(line, test_regex)
+            if extracted_name:
+                extended_test_name = f"{test_name}-{extracted_name}"
+            else:
+                extended_test_name = test_name
+            tests_to_create[extended_test_name].add(line)
+
+        for name, lines in tests_to_create.items():
             metadata, _ = SuiteMetadata.objects.get_or_create(suite=suite.slug, name=name, kind='test')
+            testrun.tests.create(
+                suite=suite,
+                result=(len(lines) == 0),
+                log='\n'.join(lines),
+                metadata=metadata,
+                build=testrun.build,
+                environment=testrun.environment,
+            )
+
+            # Some lines of the matched regex might be the same, and we don't want to create
+            # multiple tests like test1-sha1, test1-sha1, etc, so we'll create a set of sha1sums
+            # then create only new tests for unique sha's
+
+            for line in lines:
+                sha = self.__create_shasum(line)
+                name_with_sha = f"{name}-{sha}"
+                shas[name_with_sha].add(line)
+
+        for name_with_sha, lines in shas.items():
+            metadata, _ = SuiteMetadata.objects.get_or_create(suite=suite.slug, name=name_with_sha, kind='test')
             testrun.tests.create(
                 suite=suite,
                 result=False,
@@ -106,11 +130,30 @@ def __create_tests(self, testrun, suite, test_name, lines):
                 environment=testrun.environment,
             )
 
+    def __remove_numbers_and_time(self, snippet):
+        without_numbers = re.sub(r"(0x[a-f0-9]+|[<\[][0-9a-f]+?[>\]]|\d+)", "", snippet)
+        without_time = re.sub(r"^\[[^\]]+\]", "", without_numbers)
+
+        return without_time
+
+    def __create_name(self, snippet, regex=None):
+        matches = None
+        if regex:
+            matches = regex.findall(snippet)
+        if not matches:
+            return None
+        snippet = matches[0]
+        without_numbers_and_time = self.__remove_numbers_and_time(snippet)
+
+        # Limit the name length to 191 characters, since the max name length
+        # for SuiteMetadata in SQUAD is 256 characters. The SHA and "-" take 65
+        # characters: 256-65=191
+        return slugify(without_numbers_and_time)[:191]
+
     def __create_shasum(self, snippet):
         sha = hashlib.sha256()
-        without_numbers = re.sub(r'(0x[a-f0-9]+|[<\[][0-9a-f]+?[>\]]|\d+)', '', snippet)
-        without_time = re.sub(r'^\[[^\]]+\]', '', without_numbers)
-        sha.update(without_time.encode())
+        without_numbers_and_time = self.__remove_numbers_and_time(snippet)
+        sha.update(without_numbers_and_time.encode())
         return sha.hexdigest()
 
     def postprocess_testrun(self, testrun):
@@ -133,4 +176,8 @@ def postprocess_testrun(self, testrun):
 
             for regex_id in range(len(REGEXES)):
                 test_name = REGEXES[regex_id][REGEX_NAME]
-                self.__create_tests(testrun, suite, test_name, snippets[regex_id])
+                regex_pattern = REGEXES[regex_id][REGEX_EXTRACT_NAME]
+                test_name_regex = None
+                if regex_pattern:
+                    test_name_regex = re.compile(regex_pattern, re.S | re.M)
+                self.__create_tests(testrun, suite, test_name, snippets[regex_id], test_name_regex)