diff --git a/cleanowners.py b/cleanowners.py index 30cdde5..0afe7d8 100644 --- a/cleanowners.py +++ b/cleanowners.py @@ -1,5 +1,6 @@ """A GitHub Action to suggest removal of non-organization members from CODEOWNERS files.""" +import re import uuid import auth @@ -17,6 +18,44 @@ def get_org(github_connection, organization): return None +def remove_username_from_content(content, username, changed_lines): + """Remove a @username from CODEOWNERS content using line-scoped regex. + + Args: + content: The current CODEOWNERS file content as bytes. + username: The GitHub username to remove (without @). + changed_lines: A set[int] tracking which line indices were modified. + + Returns: + The updated content with the username removed. + """ + pattern = re.escape(f"@{username}".encode("ASCII")) + lines = content.split(b"\n") + for i, line in enumerate(lines): + new_line = re.sub(pattern + rb"(?=\s|$)", b"", line) + if new_line != line: + lines[i] = new_line + changed_lines.add(i) + return b"\n".join(lines) + + +def cleanup_whitespace(content, changed_lines): + """Normalize whitespace only on lines where usernames were removed. + + Args: + content: The CODEOWNERS file content as bytes. + changed_lines: A set[int] of line indices to clean up. + + Returns: + The content with extra whitespace removed on affected lines. + """ + lines = content.split(b"\n") + for i in changed_lines: + lines[i] = re.sub(rb"[ \t]{2,}", b" ", lines[i]) + lines[i] = re.sub(rb"[ \t]+(?=\r?$)", b"", lines[i]) + return b"\n".join(lines) + + def main(): # pragma: no cover """Run the main program""" @@ -143,7 +182,8 @@ def main(): # pragma: no cover usernames = get_usernames_from_codeowners(codeowners_decoded) usernames_to_remove = [] - codeowners_file_contents_new = None + codeowners_file_contents_new = codeowners_decoded + changed_lines: set[int] = set() for username in usernames: org = organization if organization else repo.owner.login gh_org = get_org(github_connection, org) @@ -161,15 +201,20 @@ def main(): # pragma: no cover if not dry_run: # Remove that username from the codeowners_file_contents file_changed = True - bytes_username = f"@{username}".encode("ASCII") - codeowners_file_contents_new = codeowners_decoded.replace( - bytes_username, b"" + codeowners_file_contents_new = remove_username_from_content( + codeowners_file_contents_new, username, changed_lines ) # Store the repo and users to remove for reporting later if usernames_to_remove: repo_and_users_to_remove[repo] = usernames_to_remove + # Clean up extra whitespace only on lines where usernames were removed + if file_changed: + codeowners_file_contents_new = cleanup_whitespace( + codeowners_file_contents_new, changed_lines + ) + # Update the CODEOWNERS file if usernames were removed if file_changed: eligble_for_pr_count += 1 diff --git a/test_cleanowners.py b/test_cleanowners.py index eb1cfc4..87dd40b 100644 --- a/test_cleanowners.py +++ b/test_cleanowners.py @@ -8,12 +8,14 @@ import github3 from cleanowners import ( build_default_codeowners, + cleanup_whitespace, commit_changes, get_codeowners_file, get_org, get_repos_iterator, get_usernames_from_codeowners, print_stats, + remove_username_from_content, ) @@ -147,6 +149,122 @@ def test_get_usernames_from_codeowners_with_raw_bytes(self): self.assertEqual(result, expected_usernames) + def test_multiple_username_removals_are_cumulative(self): + """Test that removing multiple usernames preserves all removals. + + Regression test for https://github.com/github-community-projects/cleanowners/issues/380 + The removal loop must accumulate changes rather than replacing from the + original content each time, otherwise only the last removal survives. + """ + codeowners_decoded = b"* @alice @bob @charlie\ndocs/* @alice\n" + usernames_to_remove = ["alice", "bob"] + + codeowners_file_contents_new = codeowners_decoded + changed_lines: set[int] = set() + for username in usernames_to_remove: + codeowners_file_contents_new = remove_username_from_content( + codeowners_file_contents_new, username, changed_lines + ) + codeowners_file_contents_new = cleanup_whitespace( + codeowners_file_contents_new, changed_lines + ) + + remaining = get_usernames_from_codeowners(codeowners_file_contents_new) + self.assertEqual(remaining, ["charlie"]) + self.assertNotIn(b"@alice", codeowners_file_contents_new) + self.assertNotIn(b"@bob", codeowners_file_contents_new) + + def test_username_removal_does_not_corrupt_similar_names(self): + """Test that removing @bob does not corrupt @bobsmith. + + The removal must use word-boundary matching so that @bob only matches + the exact handle, not as a prefix of @bobsmith. + """ + codeowners_decoded = b"* @bobsmith @bob @charlie\n" + usernames_to_remove = ["bob"] + + codeowners_file_contents_new = codeowners_decoded + changed_lines: set[int] = set() + for username in usernames_to_remove: + codeowners_file_contents_new = remove_username_from_content( + codeowners_file_contents_new, username, changed_lines + ) + codeowners_file_contents_new = cleanup_whitespace( + codeowners_file_contents_new, changed_lines + ) + + remaining = get_usernames_from_codeowners(codeowners_file_contents_new) + self.assertEqual(remaining, ["bobsmith", "charlie"]) + self.assertIn(b"@bobsmith", codeowners_file_contents_new) + self.assertNotIn(b"@bob ", codeowners_file_contents_new) + + def test_username_removal_cleans_up_whitespace(self): + """Test that removing usernames does not leave extra whitespace. + + After removing a username from between two others, the resulting + double space should be collapsed to a single space, and trailing + whitespace should be stripped. + """ + codeowners_decoded = b"* @alice @bob @charlie\n" + usernames_to_remove = ["bob"] + + codeowners_file_contents_new = codeowners_decoded + changed_lines: set[int] = set() + for username in usernames_to_remove: + codeowners_file_contents_new = remove_username_from_content( + codeowners_file_contents_new, username, changed_lines + ) + codeowners_file_contents_new = cleanup_whitespace( + codeowners_file_contents_new, changed_lines + ) + + self.assertEqual(codeowners_file_contents_new, b"* @alice @charlie\n") + + def test_username_removal_handles_crlf_line_endings(self): + """Test that whitespace cleanup works with CRLF line endings. + + Windows-style line endings use \\r\\n. The trailing whitespace + cleanup must strip spaces before \\r without consuming the \\r itself. + """ + codeowners_decoded = b"* @alice @bob @charlie\r\n" + usernames_to_remove = ["bob"] + + codeowners_file_contents_new = codeowners_decoded + changed_lines: set[int] = set() + for username in usernames_to_remove: + codeowners_file_contents_new = remove_username_from_content( + codeowners_file_contents_new, username, changed_lines + ) + codeowners_file_contents_new = cleanup_whitespace( + codeowners_file_contents_new, changed_lines + ) + + self.assertEqual(codeowners_file_contents_new, b"* @alice @charlie\r\n") + + def test_whitespace_cleanup_scoped_to_changed_lines(self): + """Test that whitespace cleanup only affects lines where usernames were removed. + + Lines with intentional alignment spacing should not be modified + if no username was removed from them. + """ + codeowners_decoded = b"src/** @alice @bob @charlie\ndocs/** @dave\n" + usernames_to_remove = ["bob"] + + codeowners_file_contents_new = codeowners_decoded + changed_lines: set[int] = set() + for username in usernames_to_remove: + codeowners_file_contents_new = remove_username_from_content( + codeowners_file_contents_new, username, changed_lines + ) + codeowners_file_contents_new = cleanup_whitespace( + codeowners_file_contents_new, changed_lines + ) + + # src line should be normalized (removal happened there) + self.assertIn(b"src/** @alice @charlie", codeowners_file_contents_new) + # docs line should be untouched (no removal happened there) + self.assertIn(b"docs/** @dave", codeowners_file_contents_new) + class TestGetOrganization(unittest.TestCase): """Test the get_org function in cleanowners.py"""