Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 49 additions & 4 deletions cleanowners.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""A GitHub Action to suggest removal of non-organization members from CODEOWNERS files."""

import re
import uuid

import auth
Expand All @@ -17,6 +18,44 @@ def get_org(github_connection, organization):
return None


def remove_username_from_content(content, username, changed_lines):
"""Remove a @username from CODEOWNERS content using line-scoped regex.

Args:
content: The current CODEOWNERS file content as bytes.
username: The GitHub username to remove (without @).
changed_lines: A set[int] tracking which line indices were modified.

Returns:
The updated content with the username removed.
"""
pattern = re.escape(f"@{username}".encode("ASCII"))
lines = content.split(b"\n")
for i, line in enumerate(lines):
new_line = re.sub(pattern + rb"(?=\s|$)", b"", line)
if new_line != line:
lines[i] = new_line
changed_lines.add(i)
return b"\n".join(lines)


def cleanup_whitespace(content, changed_lines):
"""Normalize whitespace only on lines where usernames were removed.

Args:
content: The CODEOWNERS file content as bytes.
changed_lines: A set[int] of line indices to clean up.

Returns:
The content with extra whitespace removed on affected lines.
"""
lines = content.split(b"\n")
for i in changed_lines:
lines[i] = re.sub(rb"[ \t]{2,}", b" ", lines[i])
lines[i] = re.sub(rb"[ \t]+(?=\r?$)", b"", lines[i])
return b"\n".join(lines)


def main(): # pragma: no cover
"""Run the main program"""

Expand Down Expand Up @@ -143,7 +182,8 @@ def main(): # pragma: no cover
usernames = get_usernames_from_codeowners(codeowners_decoded)

usernames_to_remove = []
codeowners_file_contents_new = None
codeowners_file_contents_new = codeowners_decoded
changed_lines: set[int] = set()
for username in usernames:
org = organization if organization else repo.owner.login
gh_org = get_org(github_connection, org)
Expand All @@ -161,15 +201,20 @@ def main(): # pragma: no cover
if not dry_run:
# Remove that username from the codeowners_file_contents
file_changed = True
bytes_username = f"@{username}".encode("ASCII")
codeowners_file_contents_new = codeowners_decoded.replace(
bytes_username, b""
codeowners_file_contents_new = remove_username_from_content(
codeowners_file_contents_new, username, changed_lines
)

# Store the repo and users to remove for reporting later
if usernames_to_remove:
repo_and_users_to_remove[repo] = usernames_to_remove

# Clean up extra whitespace only on lines where usernames were removed
if file_changed:
codeowners_file_contents_new = cleanup_whitespace(
codeowners_file_contents_new, changed_lines
)

# Update the CODEOWNERS file if usernames were removed
if file_changed:
eligble_for_pr_count += 1
Expand Down
118 changes: 118 additions & 0 deletions test_cleanowners.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
import github3
from cleanowners import (
build_default_codeowners,
cleanup_whitespace,
commit_changes,
get_codeowners_file,
get_org,
get_repos_iterator,
get_usernames_from_codeowners,
print_stats,
remove_username_from_content,
)


Expand Down Expand Up @@ -147,6 +149,122 @@ def test_get_usernames_from_codeowners_with_raw_bytes(self):

self.assertEqual(result, expected_usernames)

def test_multiple_username_removals_are_cumulative(self):
"""Test that removing multiple usernames preserves all removals.

Regression test for https://github.com/github-community-projects/cleanowners/issues/380
The removal loop must accumulate changes rather than replacing from the
original content each time, otherwise only the last removal survives.
"""
codeowners_decoded = b"* @alice @bob @charlie\ndocs/* @alice\n"
usernames_to_remove = ["alice", "bob"]

codeowners_file_contents_new = codeowners_decoded
changed_lines: set[int] = set()
for username in usernames_to_remove:
codeowners_file_contents_new = remove_username_from_content(
codeowners_file_contents_new, username, changed_lines
)
codeowners_file_contents_new = cleanup_whitespace(
codeowners_file_contents_new, changed_lines
)

remaining = get_usernames_from_codeowners(codeowners_file_contents_new)
self.assertEqual(remaining, ["charlie"])
self.assertNotIn(b"@alice", codeowners_file_contents_new)
self.assertNotIn(b"@bob", codeowners_file_contents_new)

def test_username_removal_does_not_corrupt_similar_names(self):
"""Test that removing @bob does not corrupt @bobsmith.

The removal must use word-boundary matching so that @bob only matches
the exact handle, not as a prefix of @bobsmith.
"""
codeowners_decoded = b"* @bobsmith @bob @charlie\n"
usernames_to_remove = ["bob"]

codeowners_file_contents_new = codeowners_decoded
changed_lines: set[int] = set()
for username in usernames_to_remove:
codeowners_file_contents_new = remove_username_from_content(
codeowners_file_contents_new, username, changed_lines
)
codeowners_file_contents_new = cleanup_whitespace(
codeowners_file_contents_new, changed_lines
)

remaining = get_usernames_from_codeowners(codeowners_file_contents_new)
self.assertEqual(remaining, ["bobsmith", "charlie"])
self.assertIn(b"@bobsmith", codeowners_file_contents_new)
self.assertNotIn(b"@bob ", codeowners_file_contents_new)

def test_username_removal_cleans_up_whitespace(self):
"""Test that removing usernames does not leave extra whitespace.

After removing a username from between two others, the resulting
double space should be collapsed to a single space, and trailing
whitespace should be stripped.
"""
codeowners_decoded = b"* @alice @bob @charlie\n"
usernames_to_remove = ["bob"]

codeowners_file_contents_new = codeowners_decoded
changed_lines: set[int] = set()
for username in usernames_to_remove:
codeowners_file_contents_new = remove_username_from_content(
codeowners_file_contents_new, username, changed_lines
)
codeowners_file_contents_new = cleanup_whitespace(
codeowners_file_contents_new, changed_lines
)

self.assertEqual(codeowners_file_contents_new, b"* @alice @charlie\n")

def test_username_removal_handles_crlf_line_endings(self):
"""Test that whitespace cleanup works with CRLF line endings.

Windows-style line endings use \\r\\n. The trailing whitespace
cleanup must strip spaces before \\r without consuming the \\r itself.
"""
codeowners_decoded = b"* @alice @bob @charlie\r\n"
usernames_to_remove = ["bob"]

codeowners_file_contents_new = codeowners_decoded
changed_lines: set[int] = set()
for username in usernames_to_remove:
codeowners_file_contents_new = remove_username_from_content(
codeowners_file_contents_new, username, changed_lines
)
codeowners_file_contents_new = cleanup_whitespace(
codeowners_file_contents_new, changed_lines
)

self.assertEqual(codeowners_file_contents_new, b"* @alice @charlie\r\n")

def test_whitespace_cleanup_scoped_to_changed_lines(self):
"""Test that whitespace cleanup only affects lines where usernames were removed.

Lines with intentional alignment spacing should not be modified
if no username was removed from them.
"""
codeowners_decoded = b"src/** @alice @bob @charlie\ndocs/** @dave\n"
usernames_to_remove = ["bob"]

codeowners_file_contents_new = codeowners_decoded
changed_lines: set[int] = set()
for username in usernames_to_remove:
codeowners_file_contents_new = remove_username_from_content(
codeowners_file_contents_new, username, changed_lines
)
codeowners_file_contents_new = cleanup_whitespace(
codeowners_file_contents_new, changed_lines
)

# src line should be normalized (removal happened there)
self.assertIn(b"src/** @alice @charlie", codeowners_file_contents_new)
# docs line should be untouched (no removal happened there)
self.assertIn(b"docs/** @dave", codeowners_file_contents_new)


class TestGetOrganization(unittest.TestCase):
"""Test the get_org function in cleanowners.py"""
Expand Down
Loading