github-community-projects · jmeridth · Apr 7, 2026 · Apr 7, 2026
@@ -133,12 +133,14 @@ def main():  # pragma: no cover
 
             if codeowners_file_contents.content is None:
                 # This is a large file so we need to get the sha and download based off the sha
-                codeowners_file_contents = repo.blob(
+                codeowners_decoded = repo.blob(
                     repo.file_contents(codeowners_filepath).sha
                 ).decode_content()
+            else:
+                codeowners_decoded = codeowners_file_contents.decoded
 
             # Extract the usernames from the CODEOWNERS file
-            usernames = get_usernames_from_codeowners(codeowners_file_contents)
+            usernames = get_usernames_from_codeowners(codeowners_decoded)
 
             usernames_to_remove = []
             codeowners_file_contents_new = None
@@ -160,10 +162,8 @@ def main():  # pragma: no cover
                         # Remove that username from the codeowners_file_contents
                         file_changed = True
                         bytes_username = f"@{username}".encode("ASCII")
-                        codeowners_file_contents_new = (
-                            codeowners_file_contents.decoded.replace(
-                                bytes_username, b""
-                            )
+                        codeowners_file_contents_new = codeowners_decoded.replace(
+                            bytes_username, b""
                         )
 
             # Store the repo and users to remove for reporting later
@@ -291,9 +291,9 @@ def get_repos_iterator(organization, repository_list, github_connection):
 def get_usernames_from_codeowners(codeowners_file_contents, ignore_teams=True):
     """Extract the usernames from the CODEOWNERS file"""
     usernames = []
-    for line in codeowners_file_contents.decoded.splitlines():
+    for line in codeowners_file_contents.splitlines():
         if line:
-            line = line.decode()
+            line = line.decode() if isinstance(line, bytes) else line
             # skip comments
             if line.lstrip().startswith("#"):
                 continue

@@ -103,8 +103,7 @@ class TestGetUsernamesFromCodeowners(unittest.TestCase):
 
     def test_get_usernames_from_codeowners_ignore_teams(self):
         """Test the get_usernames_from_codeowners function."""
-        codeowners_file_contents = MagicMock()
-        codeowners_file_contents.decoded = """
+        codeowners_file_contents = """
         # Comment
         @user1
         @user2
@@ -120,8 +119,7 @@ def test_get_usernames_from_codeowners_ignore_teams(self):
 
     def test_get_usernames_from_codeowners_with_teams(self):
         """Test the get_usernames_from_codeowners function."""
-        codeowners_file_contents = MagicMock()
-        codeowners_file_contents.decoded = """
+        codeowners_file_contents = """
         # Comment
         @user1
         @user2
@@ -135,6 +133,20 @@ def test_get_usernames_from_codeowners_with_teams(self):
 
         self.assertEqual(result, expected_usernames)
 
+    def test_get_usernames_from_codeowners_with_raw_bytes(self):
+        """Test that get_usernames_from_codeowners works with raw bytes (large file path).
+
+        Regression test for https://github.com/github-community-projects/cleanowners/issues/378
+        When a CODEOWNERS file is large, blob().decode_content() returns raw bytes
+        instead of a Contents object with a .decoded attribute.
+        """
+        codeowners_file_contents = b"* @user1 @user2\ndocs/* @user3\n"
+        expected_usernames = ["user1", "user2", "user3"]
+
+        result = get_usernames_from_codeowners(codeowners_file_contents)
+
+        self.assertEqual(result, expected_usernames)
+
 
 class TestGetOrganization(unittest.TestCase):
     """Test the get_org function in cleanowners.py"""