Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 91 additions & 70 deletions check_rule_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,109 +2,130 @@
import xml.etree.ElementTree as ET
from pathlib import Path
import sys
from collections import defaultdict, Counter

def run_git_command(args):
result = subprocess.run(args, capture_output=True, text=True, check=True)
return result.stdout

def get_changed_rule_files():
"""Get a list of changed or added rule files in the PR."""
try:
result = subprocess.run(
["git", "diff", "--name-only", "origin/main...HEAD"],
capture_output=True,
text=True,
check=True,
)
changed_files = [
Path(f.strip()) for f in result.stdout.splitlines()
if f.startswith("rules/") and f.endswith(".xml")
]
output = run_git_command(["git", "diff", "--name-status", "origin/main...HEAD"])
changed_files = []
for line in output.strip().splitlines():
parts = line.strip().split(maxsplit=1)
if len(parts) != 2:
continue
status, file_path = parts
if file_path.startswith("rules/") and file_path.endswith(".xml"):
changed_files.append((status, Path(file_path)))
return changed_files
except subprocess.CalledProcessError as e:
print("❌ Failed to get changed files:", e)
sys.exit(1)

def extract_rule_ids_from_xml(content):
ids = set()
ids = []
try:
root = ET.fromstring(content)
# Wrap multiple root elements in a fake <root> tag to avoid parse errors
wrapped = f"<root>{content}</root>"
root = ET.fromstring(wrapped)
for rule in root.findall(".//rule"):
rule_id = rule.get("id")
if rule_id and rule_id.isdigit():
ids.add(int(rule_id))
except ET.ParseError:
pass
ids.append(int(rule_id))
except ET.ParseError as e:
print(f"⚠️ XML Parse Error: {e}")
return ids

def get_rule_ids_in_files(files):
ids = set()
for path in files:
try:
content = path.read_text()
ids.update(extract_rule_ids_from_xml(content))
except Exception as e:
print(f"⚠️ Could not read {path}: {e}")
return ids

def get_all_main_rule_ids():
"""Get all rule IDs from the rules/*.xml files in origin/main."""
subprocess.run(["git", "fetch", "origin", "main"], check=True)
result = subprocess.run(
["git", "ls-tree", "-r", "origin/main", "--name-only"],
capture_output=True,
text=True,
check=True,
)
xml_files = [
f for f in result.stdout.splitlines()
if f.startswith("rules/") and f.endswith(".xml")
]

all_ids = set()
def get_rule_ids_per_file_in_main():
run_git_command(["git", "fetch", "origin", "main"])
files_output = run_git_command(["git", "ls-tree", "-r", "origin/main", "--name-only"])
xml_files = [f for f in files_output.splitlines() if f.startswith("rules/") and f.endswith(".xml")]

rule_id_to_files = defaultdict(set)
for file in xml_files:
show = subprocess.run(
["git", "show", f"origin/main:{file}"],
capture_output=True,
text=True,
)
all_ids.update(extract_rule_ids_from_xml(show.stdout))
return all_ids
try:
content = run_git_command(["git", "show", f"origin/main:{file}"])
rule_ids = extract_rule_ids_from_xml(content)
for rule_id in rule_ids:
rule_id_to_files[rule_id].add(file)
except subprocess.CalledProcessError:
continue
return rule_id_to_files

def get_rule_ids_from_main_version(file_path: Path):
try:
content = run_git_command(["git", "show", f"origin/main:{file_path.as_posix()}"])
return extract_rule_ids_from_xml(content)
except subprocess.CalledProcessError:
return []

def detect_duplicates(rule_ids):
counter = Counter(rule_ids)
return [rule_id for rule_id, count in counter.items() if count > 1]

def print_conflicts(conflicting_ids, rule_id_to_files):
print("❌ Conflicts detected:")
for rule_id in sorted(conflicting_ids):
files = rule_id_to_files.get(rule_id, [])
print(f" - Rule ID {rule_id} found in:")
for f in files:
print(f" • {f}")

def main():
changed_files = get_changed_rule_files()
if not changed_files:
print("✅ No rule files were changed in this PR.")
return

# print(f"🔍 Checking these files for conflicts: {[f.name for f in changed_files]}")

# changed_ids = get_rule_ids_in_files(changed_files)
# main_ids = get_all_main_rule_ids()
# conflicts = changed_ids & main_ids

# if conflicts:
# print(f"❌ Conflicting rule IDs: {sorted(conflicts)}")
# sys.exit(1)
# else:
# print("✅ No rule ID conflicts.")
rule_id_to_files_main = get_rule_ids_per_file_in_main()

print(f"🔍 Checking these files for conflicts: {[f.name for f in changed_files]}")
main_ids = get_all_main_rule_ids()
print(f"🔍 Checking rule ID conflicts for files: {[f.name for _, f in changed_files]}")

# Loop through each changed file and check for ID conflicts
for path in changed_files:
for status, path in changed_files:
print(f"\n🔎 Checking file: {path.name}")

try:
content = path.read_text()
file_ids = extract_rule_ids_from_xml(content)
dev_content = path.read_text()
dev_ids = extract_rule_ids_from_xml(dev_content)
except Exception as e:
print(f"⚠️ Could not read {path.name}: {e}")
continue
conflicts = file_ids & main_ids
if conflicts:
print(f"❌ Conflicting rule IDs in {path.name} file. Rule IDs: {sorted(conflicts)}")

# Check for internal duplicates
duplicates = detect_duplicates(dev_ids)
if duplicates:
print(f"❌ Duplicate rule IDs detected in {path.name}: {sorted(duplicates)}")
sys.exit(1)
else:
print(f"✅ No rule ID conflicts in {path.name}.")

print("\n✅ All checked files are conflict-free.")
if status == "A":
# New file
conflicting_ids = set(dev_ids) & set(rule_id_to_files_main.keys())
if conflicting_ids:
print_conflicts(conflicting_ids, rule_id_to_files_main)
sys.exit(1)
else:
print(f"✅ No conflict in new file {path.name}")

elif status == "M":
# Modified file
main_ids = get_rule_ids_from_main_version(path)
if set(dev_ids) == set(main_ids):
print(f"ℹ️ {path.name} modified but rule IDs unchanged.")
continue

new_or_changed_ids = set(dev_ids) - set(main_ids)
conflicting_ids = new_or_changed_ids & set(rule_id_to_files_main.keys())

if conflicting_ids:
print_conflicts(conflicting_ids, rule_id_to_files_main)
sys.exit(1)
else:
print(f"✅ Modified file {path.name} has no conflicting rule IDs.")

print("\n✅ All rule file changes passed conflict checks.")

if __name__ == "__main__":
main()