diff --git a/cleanowners.py b/cleanowners.py index 914ff3e..30cdde5 100644 --- a/cleanowners.py +++ b/cleanowners.py @@ -133,12 +133,14 @@ def main(): # pragma: no cover if codeowners_file_contents.content is None: # This is a large file so we need to get the sha and download based off the sha - codeowners_file_contents = repo.blob( + codeowners_decoded = repo.blob( repo.file_contents(codeowners_filepath).sha ).decode_content() + else: + codeowners_decoded = codeowners_file_contents.decoded # Extract the usernames from the CODEOWNERS file - usernames = get_usernames_from_codeowners(codeowners_file_contents) + usernames = get_usernames_from_codeowners(codeowners_decoded) usernames_to_remove = [] codeowners_file_contents_new = None @@ -160,10 +162,8 @@ def main(): # pragma: no cover # Remove that username from the codeowners_file_contents file_changed = True bytes_username = f"@{username}".encode("ASCII") - codeowners_file_contents_new = ( - codeowners_file_contents.decoded.replace( - bytes_username, b"" - ) + codeowners_file_contents_new = codeowners_decoded.replace( + bytes_username, b"" ) # Store the repo and users to remove for reporting later @@ -291,9 +291,9 @@ def get_repos_iterator(organization, repository_list, github_connection): def get_usernames_from_codeowners(codeowners_file_contents, ignore_teams=True): """Extract the usernames from the CODEOWNERS file""" usernames = [] - for line in codeowners_file_contents.decoded.splitlines(): + for line in codeowners_file_contents.splitlines(): if line: - line = line.decode() + line = line.decode() if isinstance(line, bytes) else line # skip comments if line.lstrip().startswith("#"): continue diff --git a/test_cleanowners.py b/test_cleanowners.py index a1281bb..eb1cfc4 100644 --- a/test_cleanowners.py +++ b/test_cleanowners.py @@ -103,8 +103,7 @@ class TestGetUsernamesFromCodeowners(unittest.TestCase): def test_get_usernames_from_codeowners_ignore_teams(self): """Test the get_usernames_from_codeowners function.""" - codeowners_file_contents = MagicMock() - codeowners_file_contents.decoded = """ + codeowners_file_contents = """ # Comment @user1 @user2 @@ -120,8 +119,7 @@ def test_get_usernames_from_codeowners_ignore_teams(self): def test_get_usernames_from_codeowners_with_teams(self): """Test the get_usernames_from_codeowners function.""" - codeowners_file_contents = MagicMock() - codeowners_file_contents.decoded = """ + codeowners_file_contents = """ # Comment @user1 @user2 @@ -135,6 +133,20 @@ def test_get_usernames_from_codeowners_with_teams(self): self.assertEqual(result, expected_usernames) + def test_get_usernames_from_codeowners_with_raw_bytes(self): + """Test that get_usernames_from_codeowners works with raw bytes (large file path). + + Regression test for https://github.com/github-community-projects/cleanowners/issues/378 + When a CODEOWNERS file is large, blob().decode_content() returns raw bytes + instead of a Contents object with a .decoded attribute. + """ + codeowners_file_contents = b"* @user1 @user2\ndocs/* @user3\n" + expected_usernames = ["user1", "user2", "user3"] + + result = get_usernames_from_codeowners(codeowners_file_contents) + + self.assertEqual(result, expected_usernames) + class TestGetOrganization(unittest.TestCase): """Test the get_org function in cleanowners.py"""