From 2fe11e4d672067df38fcaa1504a1fd97a26b9c86 Mon Sep 17 00:00:00 2001 From: Samson <66468924+SamsonIdowu@users.noreply.github.com> Date: Mon, 7 Jul 2025 11:31:29 +0100 Subject: [PATCH] Update check_rule_ids.py --- check_rule_ids.py | 161 ++++++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 70 deletions(-) diff --git a/check_rule_ids.py b/check_rule_ids.py index d0bfaef..3ca71db 100644 --- a/check_rule_ids.py +++ b/check_rule_ids.py @@ -2,70 +2,77 @@ import xml.etree.ElementTree as ET from pathlib import Path import sys +from collections import defaultdict, Counter + +def run_git_command(args): + result = subprocess.run(args, capture_output=True, text=True, check=True) + return result.stdout def get_changed_rule_files(): - """Get a list of changed or added rule files in the PR.""" try: - result = subprocess.run( - ["git", "diff", "--name-only", "origin/main...HEAD"], - capture_output=True, - text=True, - check=True, - ) - changed_files = [ - Path(f.strip()) for f in result.stdout.splitlines() - if f.startswith("rules/") and f.endswith(".xml") - ] + output = run_git_command(["git", "diff", "--name-status", "origin/main...HEAD"]) + changed_files = [] + for line in output.strip().splitlines(): + parts = line.strip().split(maxsplit=1) + if len(parts) != 2: + continue + status, file_path = parts + if file_path.startswith("rules/") and file_path.endswith(".xml"): + changed_files.append((status, Path(file_path))) return changed_files except subprocess.CalledProcessError as e: print("❌ Failed to get changed files:", e) sys.exit(1) def extract_rule_ids_from_xml(content): - ids = set() + ids = [] try: - root = ET.fromstring(content) + # Wrap multiple root elements in a fake tag to avoid parse errors + wrapped = f"{content}" + root = ET.fromstring(wrapped) for rule in root.findall(".//rule"): rule_id = rule.get("id") if rule_id and rule_id.isdigit(): - ids.add(int(rule_id)) - except ET.ParseError: - pass + ids.append(int(rule_id)) + except ET.ParseError as e: + print(f"⚠️ XML Parse Error: {e}") return ids -def get_rule_ids_in_files(files): - ids = set() - for path in files: - try: - content = path.read_text() - ids.update(extract_rule_ids_from_xml(content)) - except Exception as e: - print(f"⚠️ Could not read {path}: {e}") - return ids -def get_all_main_rule_ids(): - """Get all rule IDs from the rules/*.xml files in origin/main.""" - subprocess.run(["git", "fetch", "origin", "main"], check=True) - result = subprocess.run( - ["git", "ls-tree", "-r", "origin/main", "--name-only"], - capture_output=True, - text=True, - check=True, - ) - xml_files = [ - f for f in result.stdout.splitlines() - if f.startswith("rules/") and f.endswith(".xml") - ] - - all_ids = set() +def get_rule_ids_per_file_in_main(): + run_git_command(["git", "fetch", "origin", "main"]) + files_output = run_git_command(["git", "ls-tree", "-r", "origin/main", "--name-only"]) + xml_files = [f for f in files_output.splitlines() if f.startswith("rules/") and f.endswith(".xml")] + + rule_id_to_files = defaultdict(set) for file in xml_files: - show = subprocess.run( - ["git", "show", f"origin/main:{file}"], - capture_output=True, - text=True, - ) - all_ids.update(extract_rule_ids_from_xml(show.stdout)) - return all_ids + try: + content = run_git_command(["git", "show", f"origin/main:{file}"]) + rule_ids = extract_rule_ids_from_xml(content) + for rule_id in rule_ids: + rule_id_to_files[rule_id].add(file) + except subprocess.CalledProcessError: + continue + return rule_id_to_files + +def get_rule_ids_from_main_version(file_path: Path): + try: + content = run_git_command(["git", "show", f"origin/main:{file_path.as_posix()}"]) + return extract_rule_ids_from_xml(content) + except subprocess.CalledProcessError: + return [] + +def detect_duplicates(rule_ids): + counter = Counter(rule_ids) + return [rule_id for rule_id, count in counter.items() if count > 1] + +def print_conflicts(conflicting_ids, rule_id_to_files): + print("❌ Conflicts detected:") + for rule_id in sorted(conflicting_ids): + files = rule_id_to_files.get(rule_id, []) + print(f" - Rule ID {rule_id} found in:") + for f in files: + print(f" • {f}") def main(): changed_files = get_changed_rule_files() @@ -73,38 +80,52 @@ def main(): print("✅ No rule files were changed in this PR.") return - # print(f"🔍 Checking these files for conflicts: {[f.name for f in changed_files]}") - - # changed_ids = get_rule_ids_in_files(changed_files) - # main_ids = get_all_main_rule_ids() - # conflicts = changed_ids & main_ids - - # if conflicts: - # print(f"❌ Conflicting rule IDs: {sorted(conflicts)}") - # sys.exit(1) - # else: - # print("✅ No rule ID conflicts.") + rule_id_to_files_main = get_rule_ids_per_file_in_main() - print(f"🔍 Checking these files for conflicts: {[f.name for f in changed_files]}") - main_ids = get_all_main_rule_ids() + print(f"🔍 Checking rule ID conflicts for files: {[f.name for _, f in changed_files]}") - # Loop through each changed file and check for ID conflicts - for path in changed_files: + for status, path in changed_files: print(f"\n🔎 Checking file: {path.name}") + try: - content = path.read_text() - file_ids = extract_rule_ids_from_xml(content) + dev_content = path.read_text() + dev_ids = extract_rule_ids_from_xml(dev_content) except Exception as e: print(f"⚠️ Could not read {path.name}: {e}") continue - conflicts = file_ids & main_ids - if conflicts: - print(f"❌ Conflicting rule IDs in {path.name} file. Rule IDs: {sorted(conflicts)}") + + # Check for internal duplicates + duplicates = detect_duplicates(dev_ids) + if duplicates: + print(f"❌ Duplicate rule IDs detected in {path.name}: {sorted(duplicates)}") sys.exit(1) - else: - print(f"✅ No rule ID conflicts in {path.name}.") - print("\n✅ All checked files are conflict-free.") + if status == "A": + # New file + conflicting_ids = set(dev_ids) & set(rule_id_to_files_main.keys()) + if conflicting_ids: + print_conflicts(conflicting_ids, rule_id_to_files_main) + sys.exit(1) + else: + print(f"✅ No conflict in new file {path.name}") + + elif status == "M": + # Modified file + main_ids = get_rule_ids_from_main_version(path) + if set(dev_ids) == set(main_ids): + print(f"ℹ️ {path.name} modified but rule IDs unchanged.") + continue + + new_or_changed_ids = set(dev_ids) - set(main_ids) + conflicting_ids = new_or_changed_ids & set(rule_id_to_files_main.keys()) + + if conflicting_ids: + print_conflicts(conflicting_ids, rule_id_to_files_main) + sys.exit(1) + else: + print(f"✅ Modified file {path.name} has no conflicting rule IDs.") + + print("\n✅ All rule file changes passed conflict checks.") if __name__ == "__main__": main()