From 41cbd577bcc3b2c3f4b18787f5f412c567d9e0aa Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Wed, 19 Nov 2025 12:26:58 +1100
Subject: [PATCH 01/19] 1. Changed reliance on regex patterns to presidio 2.
 Found that previous iterations of scanner had hardcoded scanned information
 including PII and file directory in report.json, have now changed it so that
 it uploads locally and gitignores report.json file when .commit.

---
 asset-scanner/patterns.json    |  86 ++--------
 asset-scanner/requirements.txt |   2 +
 asset-scanner/scan_report.json |  94 -----------
 asset-scanner/scanner.py       | 279 +++++++++++++++++----------------
 4 files changed, 163 insertions(+), 298 deletions(-)

diff --git a/asset-scanner/patterns.json b/asset-scanner/patterns.json
index 3c80dc3..9efd109 100644
--- a/asset-scanner/patterns.json
+++ b/asset-scanner/patterns.json
@@ -1,102 +1,42 @@
 {
-  "email": {
-    "pattern": "[a-zA-Z0-9+._%-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63}",
-    "risk": "Medium",
-    "description": "Email address"
-  },
   "aws_access_key": {
     "pattern": "\\bAKIA[0-9A-Z]{16}\\b",
     "risk": "High",
-    "description": "AWS Access Key"
+    "description": "AWS Access Key ID"
   },
-  "aws_secret_access_key": {
+  "aws_secret_key": {
     "pattern": "(?<![A-Za-z0-9/+=])[A-Za-z0-9/+=]{40}(?![A-Za-z0-9/+=])",
     "risk": "High",
-    "description": "AWS Secret Access Key (40-char base64-like)"
+    "description": "AWS Secret Access Key"
   },
   "gcp_service_account_key": {
     "pattern": "-----BEGIN PRIVATE KEY-----[\\s\\S]+?-----END PRIVATE KEY-----",
     "risk": "High",
-    "description": "GCP Service Account Private Key"
-  },
-  "azure_client_secret": {
-    "pattern": "(?i)(?:\\bclient[-_ ]?secret\\b|\\bazure[-_ ]?secret\\b|\\bapp[-_ ]?registration[-_ ]?secret\\b)\\s*[:=]\\s*['\"]?[A-Za-z0-9+/_\\-=]{20,128}['\"]?",
-    "risk": "High",
-    "description": "Azure client secret only when labelled"
+    "description": "GCP / generic private key block"
   },
   "ssh_private_key": {
     "pattern": "-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----[\\s\\S]+?-----END (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
     "risk": "High",
-    "description": "SSH Private Key"
-  },
-  "jwt_secret": {
-    "pattern": "\\b[A-Za-z0-9_-]{10,}\\.([A-Za-z0-9_-]{10,})\\.([A-Za-z0-9_-]{10,})\\b",
-    "risk": "High",
-    "description": "JWT token (header.payload.signature)"
-  },
-  "api_token": {
-    "pattern": "(?i)(?:\\bapi[-_ ]?token\\b|\\bapi[-_ ]?key\\b|\\baccess[-_ ]?token\\b|\\bsecret\\b)\\s*[:=]\\s*['\"]?[A-Za-z0-9._\\-]{20,}['\"]?|\\bAuthorization\\s*:\\s*Bearer\\s+[A-Za-z0-9._\\-]{20,}\\b",
-    "risk": "Medium",
-    "description": "Generic API token / key when explicitly labelled or in an Authorization header"
-  },
-  "password": {
-    "pattern": "(?i)\\bpassword\\s*[:=]\\s*['\"][^'\"\\r\\n]+['\"]",
-    "risk": "High",
-    "description": "Hard-coded password in labelled field"
-  },
-  "credit_card": {
-    "pattern": "\\b(?:4\\d{12}(?:\\d{3})?|5[1-5]\\d{14}|3[47]\\d{13}|6(?:011|5\\d{2})\\d{12})\\b",
-    "risk": "High",
-    "description": "Common card brands (Luhn check recommended in code)"
-  },
-  "ssn": {
-    "pattern": "\\b\\d{3}-\\d{2}-\\d{4}\\b",
-    "risk": "High",
-    "description": "US Social Security Number"
-  },
-  "phone_number": {
-    "pattern": "\\b04\\d{2}\\s?\\d{3}\\s?\\d{3}\\b",
-    "risk": "Medium",
-    "description": "Australian mobile number (04## ### ###)"
-  },
-  "ip_address": {
-    "pattern": "\\b(?:(?:25[0-5]|2[0-4]\\d|1\\d\\d|\\d?\\d)\\.){3}(?:25[0-5]|2[0-4]\\d|1\\d\\d|\\d?\\d)\\b",
-    "risk": "Low",
-    "description": "IPv4 address (0–255 octets)"
-  },
-  "database_connection_string": {
-    "pattern": "(?i)\\b(?:jdbc:[^\\s'\";]+|postgresql://[^\\s'\";]+|mysql://[^\\s'\";]+|mongodb:(?:\\+srv)?:[^\\s'\";]+)\\b",
-    "risk": "High",
-    "description": "Database connection string"
+    "description": "SSH private key"
   },
   "tfn": {
     "pattern": "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b",
     "risk": "High",
-    "description": "Australian Tax File Number (apply checksum in code)"
+    "description": "Australian Tax File Number"
   },
   "medicare_number": {
     "pattern": "\\b\\d{4}\\s?\\d{5}\\s?\\d{1}(?:\\s?\\d)?\\b",
     "risk": "High",
-    "description": "Medicare card number (10 digits + optional 1-digit IRN)"
+    "description": "Australian Medicare number"
   },
-  "drivers_licence_number": {
-    "pattern": "(?i)\\bdriver'?s?\\s*licen[cs]e(?:\\s*(?:no\\.?|number|#))?\\s*[:#-]?\\s*([A-Z0-9]{6,10})\\b",
+  "password_in_code": {
+    "pattern": "(?i)\\bpassword\\s*[:=]\\s*['\"][^'\"\\r\\n]{4,}['\"]",
     "risk": "High",
-    "description": "AUS driver’s licence number only when explicitly labelled"
+    "description": "Hard-coded password"
   },
-  "address_au": {
-    "pattern": "(?is)\\b\\d{1,5}\\s+[A-Za-z][A-Za-z’'\\-\\. ]+\\s+(?:St|Street|Rd|Road|Ave|Avenue|Blvd|Boulevard|Dr|Drive|Ln|Lane|Ct|Court|Pl|Place|Pde|Parade|Ter|Terrace|Way)\\b(?:,\\s*[A-Za-z][A-Za-z ’'\\-]+)?(?:,\\s*(?:VIC|NSW|QLD|SA|WA|TAS|ACT|NT))?(?:\\s+\\d{4})?(?!.{0,200}(?:\\bfull[_\\s-]?name\\b|\\bname\\b|[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,63}|\\+?[1-9]\\d{8,14}|\\bTFN\\b|\\bMedicare\\b|licen[cs]e|driver))",
-    "risk": "Low",
-    "description": "Australian street address (standalone)"
-  },
-  "address_au_with_pii": {
-    "pattern": "(?is)\\b\\d{1,5}\\s+[A-Za-z][A-Za-z’'\\-\\. ]+\\s+(?:St|Street|Rd|Road|Ave|Avenue|Blvd|Boulevard|Dr|Drive|Ln|Lane|Ct|Court|Pl|Place|Pde|Parade|Ter|Terrace|Way)\\b(?:,\\s*[A-Za-z][A-Za-z ’'\\-]+)?(?:,\\s*(?:VIC|NSW|QLD|SA|WA|TAS|ACT|NT))?(?:\\s+\\d{4})?(?=.{0,200}(?:\\bfull[_\\s-]?name\\b|\\bname\\b|[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,63}|\\+?[1-9]\\d{8,14}|\\bTFN\\b|\\bMedicare\\b|licen[cs]e|driver))",
+  "generic_secret": {
+    "pattern": "(?i)(?:secret|token|key|passwd)\\s*[:=]\\s*['\"][A-Za-z0-9._\\-+/=]{20,}['\"]",
     "risk": "High",
-    "description": "Australian street address near other identifiers (name/email/phone/ID)"
-  },
-  "name_full": {
-    "pattern": "(?i)\\b(?:full[_\\s-]?name|name|first[_\\s-]?name|last[_\\s-]?name)\\s*[:=]\\s*['\"]?[A-Z][a-z]+(?:[ -][A-Z][a-z]+){1,3}['\"]?",
-    "risk": "Low",
-    "description": "Full name in a labelled field"
+    "description": "Generic labelled secret/token"
   }
 }
\ No newline at end of file
diff --git a/asset-scanner/requirements.txt b/asset-scanner/requirements.txt
index 0004b20..c2f1b48 100644
--- a/asset-scanner/requirements.txt
+++ b/asset-scanner/requirements.txt
@@ -15,3 +15,5 @@ pytz==2025.2
 six==1.17.0
 typing_extensions==4.13.2
 tzdata==2025.2
+presidio-analyzer
+presidio-anonymizer
\ No newline at end of file
diff --git a/asset-scanner/scan_report.json b/asset-scanner/scan_report.json
index c44b3f4..e69de29 100644
--- a/asset-scanner/scan_report.json
+++ b/asset-scanner/scan_report.json
@@ -1,94 +0,0 @@
-[
-  {
-    "pattern": "ssn",
-    "description": "US Social Security Number",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 18,
-    "risk": "High",
-    "tip": "Remove or mask SSNs; handle only within strictly controlled, compliant systems.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 32 \u2014 Security of processing",
-      "California Civil Code \u00a7 1798.85 \u2014 SSN confidentiality (if applicable)"
-    ],
-    "raw": "854-67-0739"
-  },
-  {
-    "pattern": "ssn",
-    "description": "US Social Security Number",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 28,
-    "risk": "High",
-    "tip": "Remove or mask SSNs; handle only within strictly controlled, compliant systems.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 32 \u2014 Security of processing",
-      "California Civil Code \u00a7 1798.85 \u2014 SSN confidentiality (if applicable)"
-    ],
-    "raw": "889-46-3504"
-  },
-  {
-    "pattern": "phone_number",
-    "description": "Australian mobile number (04## ### ###)",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 16,
-    "risk": "Low",
-    "tip": "Obfuscate where possible; avoid logging full numbers; limit retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "0448 368 249"
-  },
-  {
-    "pattern": "phone_number",
-    "description": "Australian mobile number (04## ### ###)",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 26,
-    "risk": "Low",
-    "tip": "Obfuscate where possible; avoid logging full numbers; limit retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "0412 522 261"
-  },
-  {
-    "pattern": "name_full",
-    "description": "Full name in a labelled field",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 16,
-    "risk": "Low",
-    "tip": "Mask or omit full names in code/logs unless strictly required; minimise collection and retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-      "GDPR Art. 4(1) \u2014 Personal data",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "Name: Amy Nguyen"
-  },
-  {
-    "pattern": "name_full",
-    "description": "Full name in a labelled field",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 26,
-    "risk": "Low",
-    "tip": "Mask or omit full names in code/logs unless strictly required; minimise collection and retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-      "GDPR Art. 4(1) \u2014 Personal data",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "Name: Nicole Williams"
-  }
-]
\ No newline at end of file
diff --git a/asset-scanner/scanner.py b/asset-scanner/scanner.py
index 1439ed2..04103cd 100644
--- a/asset-scanner/scanner.py
+++ b/asset-scanner/scanner.py
@@ -1,15 +1,30 @@
 #!/usr/bin/env python3
-"""
-scanner.py — unified scanner compatible with:
-  - patterns.json (dict: {id: {pattern, risk, description}})
-  - reporter.py (Belle's Stream 4: write_report & generate_console_report)
 
-Findings schema produced here:
-  { "pattern": <id>, "file": <path>, "line": <int>, "match": <raw>, "description": <str> }
+#!/usr/bin/env python3
+"""
+scanner.py — Redback Ethics PII & Secrets Scanner (Presidio-powered)
+
+Features:
+  • Hybrid detection: Microsoft Presidio (NLP) + custom regex fallback
+  • High-accuracy detection of names, emails, phones, credit cards, addresses
+  • Keeps full compatibility with:
+      - patterns.json → only secrets not covered by Presidio needed
+      - reporter.py → identical findings schema and exit code behavior
+
+Findings schema (unchanged):
+  {
+    "pattern": <id from patterns.json or Presidio entity>,
+    "file": <path>,
+    "line": <int>,
+    "match": <raw string>,
+    "description": <str>
+  }
 
 Exit code:
-  - 1 if any High-risk finding (per risk_rules.json via reporter.write_report)
-  - 0 otherwise
+  • 1 → if any High-risk finding (via reporter.write_report + risk_rules.json)
+  • 0 → otherwise
+
+Now requires: pip install presidio-analyzer
 """
 
 from __future__ import annotations
@@ -18,176 +33,178 @@
 import re
 import sys
 from bisect import bisect
-from typing import Dict, Any, Iterable, List, Tuple
+from typing import Dict, Any, Iterable, List
 import os
 
-# v1/v3 utilities (project-provided)
-from file_handler import find_files, read_file
+# Presidio 
+try:
+    from presidio_analyzer import AnalyzerEngine
+    from presidio_analyzer import PatternRecognizer, Pattern
+except ImportError:
+    print("[!] ERROR: presidio-analyzer not installed")
+    print("    Run: pip install presidio-analyzer")
+    sys.exit(1)
 
-# Belle's reporter (Stream 4)
+from file_handler import find_files, read_file
 from reporter import write_report, generate_console_report
 
-# ---- Defaults (align with your repo) ----
+# Defaults
 DEFAULT_PATTERNS_FILE = "patterns.json"
 DEFAULT_TARGET_EXTS = [".py", ".txt", ".md", ".cfg", ".json", ".docx", ".csv", ".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".webp"]
-DEFAULT_OUT = "scan_report.json"
+DEFAULT_OUT = "scan_report.local.json"
 
-# ---- Patterns ----
+# Presidio Engine (auto-download once)
+def get_analyzer() -> AnalyzerEngine:
+    print("[i] Initializing Presidio analyzer (first run downloads ~120 MB model)...")
+    return AnalyzerEngine()
 
+# Load patterns.json 
 def load_patterns(path: str) -> Dict[str, Dict[str, Any]]:
-    """
-    Load pattern definitions from patterns.json
-    Expected shape:
-      {
-        "email": { "pattern": "...", "risk": "Low|High|...", "description": "..." },
-        ...
-      }
-    """
     with open(path, "r", encoding="utf-8") as f:
         data = json.load(f)
-    if not isinstance(data, dict):
-        raise ValueError("patterns.json must be a JSON object mapping ids to rules.")
-    for pid, rule in data.items():
-        if "pattern" not in rule:
-            raise ValueError(f"Pattern '{pid}' is missing the 'pattern' field.")
+    print(f"[i] Loaded {len(data)} patterns from {path}")
     return data
 
-def compile_patterns(patterns: Dict[str, Dict[str, Any]]) -> Dict[str, re.Pattern]:
-    """Compile all regexes once with DOTALL (to match across lines where needed)."""
-    compiled: Dict[str, re.Pattern] = {}
-    for pid, rule in patterns.items():
-        pat = rule["pattern"]
-        try:
-            compiled[pid] = re.compile(pat, re.DOTALL)
-        except re.error as e:
-            raise ValueError(f"Invalid regex for pattern '{pid}': {e}")
-    return compiled
-
-# ---- Scanning helpers ----
-
-def _newline_indices(text: str) -> List[int]:
-    return [i for i, ch in enumerate(text) if ch == "\n"]
-
-def _line_number(newlines: List[int], idx: int) -> int:
-    # 1-based line numbers: count of newlines before idx + 1
-    return bisect(newlines, idx) + 1
-
-def scan_text(text: str, file_path: str,
-              compiled: Dict[str, re.Pattern],
-              meta: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """
-    Run all compiled patterns over a text blob, recording file and line per match.
-    Returns a list of finding dicts for reporter.py.
-    """
+#  Core scanning 
+def scan_text(text: str, file_path: str, analyzer: AnalyzerEngine, patterns_meta: Dict) -> List[Dict[str, Any]]:
     findings: List[Dict[str, Any]] = []
-    if not text:
+    if not text.strip():
         return findings
 
-    newlines = _newline_indices(text)
-    for pid, regex in compiled.items():
-        desc = meta.get(pid, {}).get("description", pid)
-        for m in regex.finditer(text):
-            start = m.start()
-            line = _line_number(newlines, start)
-            raw = m.group(0)
+    newlines = [i for i, c in enumerate(text) if c == "\n"]
+
+    # Presidio scan
+    try:
+        results = analyzer.analyze(text=text, language="en", score_threshold=0.01)
+        print(f"[i] Presidio found {len(results)} potential entities in {os.path.basename(file_path)}")
+
+        for r in results:
+            if r.score < 0.3:
+                continue
+
+            # Map Presidio entity to pattern ID
+            entity = r.entity_type.upper()
+            pattern_id = None
+
+            # Direct match via "presidio_entity" field in patterns.json
+            for pid, rule in patterns_meta.items():
+                if rule.get("presidio_entity", "").upper() == entity:
+                    pattern_id = pid
+                    break
+            # Fallback: common built-in names
+            if not pattern_id:
+                fallback_map = {
+                    "EMAIL_ADDRESS": "email",
+                    "PHONE_NUMBER": "phone",
+                    "CREDIT_CARD": "credit_card",
+                    "US_SSN": "ssn",
+                    "PERSON": "full_name",
+                    "LOCATION": "location",
+                    "IP_ADDRESS": "ip_address"
+                }
+                pattern_id = fallback_map.get(entity, entity.lower())
+
+            line = bisect(newlines, r.start) + 1
+            match_text = text[r.start:r.end]
+
             findings.append({
-                "pattern": pid,
+                "pattern": pattern_id,
                 "file": file_path,
                 "line": line,
-                "match": raw,
-                "description": desc
+                "match": match_text,
+                "description": patterns_meta.get(pattern_id, {}).get("description", f"Detected {entity}")
             })
+            print(f"    → Found: {pattern_id} | {match_text} | Line {line}")
+
+    except Exception as e:
+        print(f"[!] Presidio crashed: {e}")
+
+    # regex fallback
+    for pid, rule in patterns_meta.items():
+        pat = rule.get("pattern")
+        if not pat or pat == "NOT_NEEDED":
+            continue
+        try:
+            for m in re.finditer(pat, text, re.DOTALL):
+                line = bisect(newlines, m.start()) + 1
+                findings.append({
+                    "pattern": pid,
+                    "file": file_path,
+                    "line": line,
+                    "match": m.group(0),
+                    "description": rule.get("description", pid)
+                })
+                print(f"    → Regex hit: {pid} | {m.group(0)} | Line {line}")
+        except re.error:
+            pass
+
     return findings
 
-def scan_paths(paths: Iterable[str],
-               compiled: Dict[str, re.Pattern],
-               meta: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
-    all_findings: List[Dict[str, Any]] = []
+# file scanner
+def scan_paths(paths: Iterable[str], analyzer: AnalyzerEngine, patterns_meta: Dict) -> List[Dict[str, Any]]:
+    all_findings = []
     for path in paths:
+        print(f"\n[i] Reading: {path}")
         content = read_file(path)
-        # Ensure we have text (read_file should return str; if bytes, decode)
         if isinstance(content, bytes):
             try:
                 content = content.decode("utf-8")
-            except UnicodeDecodeError:
+            except:
                 content = content.decode("latin-1", errors="ignore")
-        if not isinstance(content, str):
-            continue
-        all_findings.extend(scan_text(content, path, compiled, meta))
+        if isinstance(content, str) and content.strip():
+            print(f"    → Extracted {len(content):,} characters")
+            all_findings.extend(scan_text(content, path, analyzer, patterns_meta))
+        else:
+            print("    → No text extracted (image-only PDF?)")
     return all_findings
 
-# ---- CLI ----
-
-def parse_args(argv: List[str]) -> argparse.Namespace:
-    ap = argparse.ArgumentParser(description="Unified sensitive-data scanner")
-    ap.add_argument("--file", help="Single file to scan (overrides --root and --ext)")
-    ap.add_argument("--root", default=".", help="Root directory to scan (default: current dir)")
-    ap.add_argument("--patterns", default=DEFAULT_PATTERNS_FILE, help="Path to patterns.json")
-    ap.add_argument("--out", default=DEFAULT_OUT, help="Path to JSON report output")
-    ap.add_argument("--ext", nargs="*", default=DEFAULT_TARGET_EXTS,
-                    help="File extensions to include (e.g., .py .txt .md .cfg .json)")
-    ap.add_argument("--no-console", action="store_true", help="Skip console summary output")
-    return ap.parse_args(argv)
+# CLI & main
+def parse_args(argv=None):
+    # parse_args function
+    ap = argparse.ArgumentParser(description="Sensitive data scanner")
+    ap.add_argument("--file", help="Single file to scan")
+    ap.add_argument("--root", default=".", help="Root directory")
+    ap.add_argument("--patterns", default=DEFAULT_PATTERNS_FILE)
+    ap.add_argument("--out", default=DEFAULT_OUT)
+    ap.add_argument("--ext", nargs="*", default=DEFAULT_TARGET_EXTS)
+    ap.add_argument("--no-console", action="store_true")
+    return ap.parse_args(argv or sys.argv[1:])
 
-# Function to get a valid directory path from the user
 def get_valid_path():
     while True:
-        path = input("Enter the directory path containing the files to scan (press Enter to use the project folder): ").strip()
-        path = path.strip('"').strip("'")  # Remove surrounding quotes if present
-        if not path:  # If no input is provided, use the current directory
-            print("No path provided. Files will be scanned in the project folder.")
-            print("-" * 63)
+        path = input("Enter directory (or Enter for current): ").strip().strip('"\'')
+        if not path:
             return os.getcwd()
-        elif os.path.isdir(path):  # Validate the provided path
-            print("-" * 63)
+        if os.path.isdir(path):
             return path
-         
-        else:
-            print("We cannot find that path. Please enter a valid directory or press Enter to use the project folder.")
-
-# ---- Main ----
-
-def main(argv: List[str] | None = None) -> int:
-    ns = parse_args(argv or sys.argv[1:])
+        print("Invalid path, try again.")
 
-    patterns = load_patterns(ns.patterns)
-    compiled = compile_patterns(patterns)
+def main():
+    ns = parse_args()
+    patterns_meta = load_patterns(ns.patterns)
+    analyzer = get_analyzer()
 
-    # Check if a specific file is provided
     if ns.file:
-        # Validate the file path
-        if not os.path.isfile(ns.file):
-            print(f"[!] The specified file does not exist: {ns.file}")
-            return 1
-
-        # Scan only the specified file
-        print(f"[i] Scanning the specified file: {ns.file}")
-        findings = scan_paths([ns.file], compiled, patterns)
+        paths = [ns.file]
+        print(f"[i] Scanning single file: {ns.file}")
     else:
-        # Identify valid directory to scan
         directory = get_valid_path()
+        paths = list(find_files(directory, ns.ext))
+        print(f"[i] Found {len(paths)} files to scan in {directory}")
 
-        # Use project helper to expand files under root with extension filter
-        file_list = list(find_files(directory, ns.ext))
-        findings = scan_paths(file_list, compiled, patterns)
+    findings = scan_paths(paths, analyzer, patterns_meta)
 
-    # JSON report (enriched with risk/tip/laws by reporter.write_report)
     enriched = write_report(findings, out_path=ns.out)
+    print(f"\n[i] Full report (with paths & raw PII) saved locally → {ns.out}")
+    print("    This file is git-ignored and must NEVER be committed.")
 
-    # Console summary (masked)
-    if not ns.no_console:
-        generate_console_report(findings)
-
-    # Exit code policy: fail if any High risk present
-    has_high = any(f.get("risk") == "High" for f in enriched)
-    if has_high:
-        print("[!] High-risk data found. Failing scan.")
+    if any(f.get("risk") == "High" for f in enriched):
+        print("\n[!] HIGH-RISK PII DETECTED → SCAN FAILED")
         return 1
-
-    if enriched:
-        print("[i] Findings present. Review the report.")
+    elif findings:
+        print(f"\n[i] {len(findings)} findings → check {ns.out}")
     else:
-        print("[✓] No sensitive data detected.")
+        print("\n[Success] NO PII FOUND!")
     return 0
 
 if __name__ == "__main__":

From a8612cc3c7da86f9b2ec9e9cecc7d3686247d257 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Wed, 19 Nov 2025 12:27:39 +1100
Subject: [PATCH 02/19] Adding .gitignore file for avoiding hardcoded info

---
 asset-scanner/.gitignore | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 asset-scanner/.gitignore

diff --git a/asset-scanner/.gitignore b/asset-scanner/.gitignore
new file mode 100644
index 0000000..2bbed19
--- /dev/null
+++ b/asset-scanner/.gitignore
@@ -0,0 +1,20 @@
+# PII & Secrets Scanner - do not commit files
+scan_report.json
+scan_report.local.json
+scan_report.shareable.json
+*.local.json
+local_scan_*.json
+temp_report_*.json
+
+# ignore any backup or temp reports
+*.json.bak
+*.json.tmp
+
+# Optional: ignore the raw findings before enrichment (if you ever dump them)
+raw_findings.json
+debug_scan.json
+
+# OS / editor 
+.DS_Store
+Thumbs.db
+*.log
\ No newline at end of file

From a3074b8efef0e19a3a77493b451d0f4aaf9b4de6 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 11:15:13 +1100
Subject: [PATCH 03/19] Removing upload scan report workflow, also rectifying
 scan fail in github

---
 .github/workflows/docker-build-deploy.yaml | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index 177a365..67b162f 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -29,10 +29,4 @@ jobs:
       - name: Run scan
         run: |
           source venv/bin/activate
-          python main.py
-
-      - name: Upload scan report
-        uses: actions/upload-artifact@v2
-        with:
-          name: scan_report
-          path: reports/scan_report.json
\ No newline at end of file
+          python main.py
\ No newline at end of file

From 0ee6373b4d57c9b3f34db479081f0be31ebd63c6 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 11:22:57 +1100
Subject: [PATCH 04/19] rerolling

---
 .github/workflows/docker-build-deploy.yaml | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index 67b162f..e37b8fb 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -29,4 +29,10 @@ jobs:
       - name: Run scan
         run: |
           source venv/bin/activate
-          python main.py
\ No newline at end of file
+          python main.py
+
+      - name: Upload scan report
+        uses: actions/upload-artifact@v4
+        with:
+          name: scan_report
+          path: reports/scan_report.json
\ No newline at end of file

From e1032d83a124c51c1236c7c387f7194587bb3e74 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 11:41:49 +1100
Subject: [PATCH 05/19] fixing directory pointed to main.py

---
 .github/workflows/docker-build-deploy.yaml | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index e37b8fb..ddcf1f1 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -25,14 +25,15 @@ jobs:
           python -m venv venv
           source venv/bin/activate
           pip install -r requirements.txt
-
+      
       - name: Run scan
         run: |
           source venv/bin/activate
-          python main.py
+          python asset-scanner/scanner.py --root . --out $RUNNER_TEMP/scan_report.json --no-console --ext .py .md .txt
 
-      - name: Upload scan report
-        uses: actions/upload-artifact@v4
-        with:
-          name: scan_report
-          path: reports/scan_report.json
\ No newline at end of file
+
+      ##- name: Upload scan report
+      ##  uses: actions/upload-artifact@v4
+      ##  with:
+       ##   name: scan_report
+      ##   path: reports/scan_report.json
\ No newline at end of file

From aaabfd695f3475ee61a5feda7a8aef56093f8715 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 11:44:14 +1100
Subject: [PATCH 06/19] rectifying workflow issue via dependency issue
 (presidio)

---
 .github/workflows/docker-build-deploy.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index ddcf1f1..86094a1 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -12,6 +12,9 @@ jobs:
   scan:
     runs-on: ubuntu-latest
     steps:
+      - name: Install presidio-analyzer
+        run: pip install presidio-analyzer
+
       - name: Checkout repository
         uses: actions/checkout@v2
 

From 4d2259a7fd2bc29330d8ad5308a7685e6d8c7cbd Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 11:53:16 +1100
Subject: [PATCH 07/19] for some reason requirements.txt did not save locally?

---
 requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 15227e1..c2f1b48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,4 +14,6 @@ python-docx==1.1.2
 pytz==2025.2
 six==1.17.0
 typing_extensions==4.13.2
-tzdata==2025.2
\ No newline at end of file
+tzdata==2025.2
+presidio-analyzer
+presidio-anonymizer
\ No newline at end of file

From 45945ecb6976c0c3043799dd4eff08ecb2e913be Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 12:07:10 +1100
Subject: [PATCH 08/19] test

---
 .github/workflows/docker-build-deploy.yaml |   3 +--
 README.md                                  | Bin 1612 -> 1699 bytes
 requirements.txt                           |   4 ++--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index 86094a1..c2c3413 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -32,8 +32,7 @@ jobs:
       - name: Run scan
         run: |
           source venv/bin/activate
-          python asset-scanner/scanner.py --root . --out $RUNNER_TEMP/scan_report.json --no-console --ext .py .md .txt
-
+          python main.py
 
       ##- name: Upload scan report
       ##  uses: actions/upload-artifact@v4
diff --git a/README.md b/README.md
index 9cf83bae8b65ffbd07b4c6783a5f91e6a191caac..21540fc89e424092031e4cdb9526b5b89a80a794 100644
GIT binary patch
delta 349
zcmX@ZvzS*xS67#pi<e7TSy@3#%fBqOs4O$JTuY00qo^FCC_=_9HL;|$D76?UGuf3f
zj*)lrV#aJB$;Z^d3{*9_pGhCc+s7m(&C8{$5a8mbkdmKVnwy$eQmmknpjVKRmY~Ty
z`7M(;P!$WaGLTefRtJ)P%w|FeTio*#bHG+iW_AZk-eQgcl4dM+>PSjbOG+~H(iMVB
z5{pVwQ-JOP+85&F0#d;{c`Az*D};5BMN|SP1@axxZ^{auz5)J0A&!0_ii*6GU$gWA
z4X$9-7Dp%ndfz8AIW@01739b5tVtXogFpsMmSfX3;N=Qu$Y&^G$Yw}m$N{p-859^w
Z7*ZLEfh34k#E{954uq*d7B2%A0|1ExOkn^3

delta 251
zcmZ3?dxl3;S67#dOIcZ2K}*ZOEVZaCGqqexYoml5qYzBkEj6*Ev?#S$OKY+>W8CCb
zjM<aLm>L+lCeLKj7vs`Z2uZCdQAo?oNi9~;NYE>(C`r(q{D4VP94Hl@UzCE7O36>I
z0Llq5D^E6KR-YWlY{n0>!#zJS2V}uAX7|bOm}4e;v)BPG=wXrI09oMV!o@Xt1B(_D
zkh;$zD#`_NG|<h;3ZA|J{y`y*ej$pAlX+NsC(mNl7J<nEedLpwoSIjh3N-i*YZ5CL
MNM^Ddn=T_403W(R2mk;8

diff --git a/requirements.txt b/requirements.txt
index c2f1b48..27d7470 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+presidio-analyzer
+presidio-anonymizer
 defusedxml==0.7.1
 Faker==37.1.0
 fonttools==4.57.0
@@ -15,5 +17,3 @@ pytz==2025.2
 six==1.17.0
 typing_extensions==4.13.2
 tzdata==2025.2
-presidio-analyzer
-presidio-anonymizer
\ No newline at end of file

From 7147c04bcef5b783a8a477191cf6eb16695d26bc Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 12:24:42 +1100
Subject: [PATCH 09/19] test 2

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 27d7470..3d7eeaa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 presidio-analyzer
 presidio-anonymizer
+spacy==3.6.1
 defusedxml==0.7.1
 Faker==37.1.0
 fonttools==4.57.0

From 2df757e67ffee77d2775a48d2fcbdc5406e6ed91 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Thu, 27 Nov 2025 12:31:04 +1100
Subject: [PATCH 10/19] added main.py

---
 main.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 main.py

diff --git a/main.py b/main.py
new file mode 100644
index 0000000..3ca2e89
--- /dev/null
+++ b/main.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import os
+print("Redback Ethics scanner – environment is ready!")
+print("Installed packages test:")
+
+try:
+    import spacy
+    print(f"spaCy {spacy.__version__} OK")
+except Exception as e:
+    print(f"spaCy failed: {e}")
+
+try:
+    from presidio_analyzer import AnalyzerEngine
+    print("Presidio Analyzer OK")
+except Exception as e:
+    print(f"Presidio failed: {e}")
+
+try:
+    import cv2
+    print(f"OpenCV {cv2.__version__} OK")
+except Exception as e:
+    print(f"OpenCV failed: {e}")
+
+print("\nNext step: actual scanner code needs to be added in scanner-bot/ or asset-scanner/")
\ No newline at end of file

From 54985af1899bf927fb65a16d9ca6a163c9137edf Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Mon, 1 Dec 2025 20:57:42 +1100
Subject: [PATCH 11/19] fixed spacy requirements

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3d7eeaa..5081f01 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 presidio-analyzer
 presidio-anonymizer
-spacy==3.6.1
+spacy==3.8.2
 defusedxml==0.7.1
 Faker==37.1.0
 fonttools==4.57.0

From 94ae455350e3821a4fe8726248b03b674a647550 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Mon, 1 Dec 2025 21:03:14 +1100
Subject: [PATCH 12/19] fixed hangup (hopefully)

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5081f01..78c88ef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,12 @@
 presidio-analyzer
 presidio-anonymizer
-spacy==3.8.2
+spacy==3.8.0
 defusedxml==0.7.1
 Faker==37.1.0
 fonttools==4.57.0
 fpdf2==2.8.3
 lxml==5.4.0
-numpy==2.0.2
+numpy==2.0.0
 opencv-python==4.12.0.88
 packaging==25.0
 pandas==2.2.3

From c034b97074489ea8b2b1fc874f6b3ea33a9253d9 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Mon, 1 Dec 2025 21:05:47 +1100
Subject: [PATCH 13/19] fixed numpy

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 78c88ef..64e62f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ Faker==37.1.0
 fonttools==4.57.0
 fpdf2==2.8.3
 lxml==5.4.0
-numpy==2.0.0
+numpy
 opencv-python==4.12.0.88
 packaging==25.0
 pandas==2.2.3

From 4491d92a552966811ecd7bda6f07e1cdc8eb65e8 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Mon, 1 Dec 2025 21:33:50 +1100
Subject: [PATCH 14/19] fixing dependencies

---
 .github/workflows/docker-build-deploy.yaml | 1 +
 requirements.txt                           | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index c2c3413..98c8e53 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -27,6 +27,7 @@ jobs:
         run: |
           python -m venv venv
           source venv/bin/activate
+          pip install --upgrade pip
           pip install -r requirements.txt
       
       - name: Run scan
diff --git a/requirements.txt b/requirements.txt
index 64e62f3..03d0fa9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ Faker==37.1.0
 fonttools==4.57.0
 fpdf2==2.8.3
 lxml==5.4.0
-numpy
+numpy==1.26.4
 opencv-python==4.12.0.88
 packaging==25.0
 pandas==2.2.3

From 3b9f9fa5bc98c535fbca6c244c3a006e77846662 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Mon, 1 Dec 2025 21:35:38 +1100
Subject: [PATCH 15/19] dependencies

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 03d0fa9..64e62f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,7 +6,7 @@ Faker==37.1.0
 fonttools==4.57.0
 fpdf2==2.8.3
 lxml==5.4.0
-numpy==1.26.4
+numpy
 opencv-python==4.12.0.88
 packaging==25.0
 pandas==2.2.3

From 63c414bd88b5e13b82aeb2ee07d96a7e8a8002d7 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Tue, 2 Dec 2025 14:21:34 +1100
Subject: [PATCH 16/19] adding new files for REDE

---
 .github/ETHICS_QUESTIONNAIRE.MD           | 36 +++++++++
 .github/issue_template/questionnaire.yaml | 23 ++++++
 .github/workflows/ethics-gate.yaml        | 99 +++++++++++++++++++++++
 .github/workflows/redeengine.py           | 38 +++++++++
 4 files changed, 196 insertions(+)
 create mode 100644 .github/ETHICS_QUESTIONNAIRE.MD
 create mode 100644 .github/issue_template/questionnaire.yaml
 create mode 100644 .github/workflows/ethics-gate.yaml
 create mode 100644 .github/workflows/redeengine.py

diff --git a/.github/ETHICS_QUESTIONNAIRE.MD b/.github/ETHICS_QUESTIONNAIRE.MD
new file mode 100644
index 0000000..57223a3
--- /dev/null
+++ b/.github/ETHICS_QUESTIONNAIRE.MD
@@ -0,0 +1,36 @@
+**Ethics & Regulatory Questionnaire**  
+*This PR cannot be merged until this form is completed.*
+
+Please reply to this comment and answer all questions below (you can copy-paste and fill it).
+
+1. Does this change involve any of the following? (check all that apply)  
+   - [ ] Training or fine-tuning of AI/ML models  
+   - [ ] Inference/serving of AI/ML models in production  
+   - [ ] Processing of personal data (PII, health, biometric, financial, children’s data, etc.)  
+   - [ ] Dual-use or military-applicable technology  
+   - [ ] Safety-critical systems (medical device, aviation, automotive, etc.)  
+   - [ ] High-impact algorithmic decision-making (credit, hiring, criminal justice, etc.)  
+   - [ ] None of the above (pure docs, tests, CI, formatting, etc.)
+
+2. Estimated risk level (your honest assessment)  
+   - [ ] Low – no ethical or regulatory impact  
+   - [ ] Medium – possible fairness/privacy concerns  
+   - [ ] High – potential for serious harm or legal non-compliance
+
+3. Brief description of any ethical/regulatory impact (or write “None”)
+
+   > 
+
+4. Relevant regulations / standards considered (e.g., EU AI Act, GDPR, HIPAA, NIST AI RMF, export controls, etc.)  
+   List them or write “N/A”
+
+   > 
+
+5. Have mitigation measures been implemented (bias testing, data minimization, consent flows, etc.)?  
+   - [ ] Yes → describe below  
+   - [ ] No  
+   - [ ] Not applicable
+
+   > 
+
+Thank you! The ethics gate will evaluate your answers automatically.
\ No newline at end of file
diff --git a/.github/issue_template/questionnaire.yaml b/.github/issue_template/questionnaire.yaml
new file mode 100644
index 0000000..b770075
--- /dev/null
+++ b/.github/issue_template/questionnaire.yaml
@@ -0,0 +1,23 @@
+name: Ethics & Regulatory Questionnaire
+description: Required for all PRs with potential ethical/regulatory impact
+title: "[Ethics Review] <PR title>"
+body:
+  - type: checkboxes
+    attributes:
+      label: Scope of Change
+      options:
+        - label: Involves training or inference of AI/ML models
+        - label: Processes personal data (PII, health, financial, etc.)
+        - label: Dual-use potential (could be used in weapons/autonomous systems)
+        - label: Affects safety-critical systems
+        - label: Purely documentation / tests / CI changes (safe)
+
+  - type: textarea
+    attributes:
+      label: Description of ethical/regulatory impact (if any)
+      placeholder: Explain who might be harmed, fairness implications, compliance requirements, etc.
+
+  - type: dropdown
+    attributes:
+      label: Have you consulted the relevant regulatory framework?
+      options: ["Yes", "No", "Not applicable"]
\ No newline at end of file
diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml
new file mode 100644
index 0000000..640ae43
--- /dev/null
+++ b/.github/workflows/ethics-gate.yaml
@@ -0,0 +1,99 @@
+on:
+  pull_request:
+    types: [opened, reopened, synchronize]
+  pull_request_review_comment:
+    types: [created]
+
+permissions:
+  contents: read          # needed for checkout
+  pull-requests: write    # needed for commenting & reviews (gh)
+  checks: write           # needed for check runs
+
+jobs:
+  require-ethics-questionnaire:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    outputs:
+      ethics_status: ${{ steps.check.outputs.status }}
+    steps:
+      - name: Checkout PR head (so .github/ files exist)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - name: Check if questionnaire already answered
+        id: check
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Look for bot comment or issue form submission (be robust to empty output)
+          RESPONSES=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[].body' 2>/dev/null | grep -i "Ethics & Regulatory Questionnaire" -A 20 || true)
+          if [[ -z "$RESPONSES" ]]; then
+            echo "status=missing" >> $GITHUB_OUTPUT
+          else
+            echo "status=answered" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Post questionnaire if missing
+        if: steps.check.outputs.status == 'missing'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.md
+          echo "Please fill out the ethical/regulatory questionnaire above before this PR can be merged."
+
+  ethics-engine:
+    needs: require-ethics-questionnaire
+    if: >
+      needs.require-ethics-questionnaire.outputs.ethics_status == 'answered' ||
+      github.event_name == 'pull_request_review_comment'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout PR head (engine needs code + workflow files)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Parse latest ethics comment & run engine
+        id: engine
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Collect comments (be robust if there are none)
+          ANSWERS=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '[.comments[].body] | join("\n\n")' 2>/dev/null || true)
+          python .github/workflows/parse_and_evaluate.py "$ANSWERS" > result.txt || true
+          cat result.txt
+          RISK=$(grep RISK_LEVEL result.txt | cut -d= -f2 || echo "LOW")
+          echo "risk=$RISK" >> $GITHUB_OUTPUT
+
+      - uses: actions/github-script@v7
+        with:
+          script: |
+            const risk = "${{ steps.engine.outputs.risk }}".trim();
+            const conclusions = {
+              "LOW":    "success",
+              "MEDIUM": "action_required",
+              "HIGH":   "failure"
+            };
+            const conclusion = conclusions[risk] || "failure";
+
+            await github.rest.checks.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: "Ethics Review",
+              head_sha: (context.payload.pull_request && context.payload.pull_request.head && context.payload.pull_request.head.sha) || github.event.pull_request.head.sha,
+              status: "completed",
+              conclusion,
+              output: {
+                title: risk === "LOW" ? "Ethics cleared" : `Ethics review: ${risk}`,
+                summary: risk === "LOW" ? "Low risk – automatically approved" : `Risk level ${risk} – review required`
+              }
+            });
+
+      - name: Block merge on HIGH risk
+        if: steps.engine.outputs.risk == 'HIGH'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh pr review ${{ github.event.pull_request.number }} \
+            --request-changes \
+            -b "@ethics-team Required manual review for high-risk change"
\ No newline at end of file
diff --git a/.github/workflows/redeengine.py b/.github/workflows/redeengine.py
new file mode 100644
index 0000000..c9f5be2
--- /dev/null
+++ b/.github/workflows/redeengine.py
@@ -0,0 +1,38 @@
+import os
+import json
+import sys
+
+def evaluate_risk(answers):
+    risk_score = 0
+    flags = []
+
+    if answers.get("involves_ai", False):
+        risk_score += 3
+        flags.append("AI/ML component")
+    if answers.get("processes_pii", False):
+        risk_score += 5
+        flags.append("Personal data")
+    if answers.get("dual_use", False):
+        risk_score += 10
+        flags.append("🚨 Dual-use technology")
+    if answers.get("safety_critical", False):
+        risk_score += 8
+        flags.append("Safety-critical")
+
+    if "purely documentation" in answers.get("safe_changes", []):
+        return "LOW", "No ethical concerns detected."
+
+    if risk_score >= 10:
+        return "HIGH", " | ".join(flags)
+    elif risk_score >= 5:
+        return "MEDIUM", " | ".join(flags)
+    else:
+        return "LOW", "Minor changes"
+
+# Parse comment or form submission here (simplified)
+# In real use, you'd parse the actual comment body
+answers = json.loads(sys.argv[1])  # passed from workflow
+level, reason = evaluate_risk(answers)
+
+print(f"RISK_LEVEL={level}")
+print(f"REASON={reason}")
\ No newline at end of file

From 424812d05601f1930de402d14987aa630bb782b1 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Tue, 2 Dec 2025 14:28:06 +1100
Subject: [PATCH 17/19] fixing again

---
 .github/workflows/ethics-gate.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml
index 640ae43..31c4812 100644
--- a/.github/workflows/ethics-gate.yaml
+++ b/.github/workflows/ethics-gate.yaml
@@ -38,7 +38,7 @@ jobs:
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.md
+          gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.MD
           echo "Please fill out the ethical/regulatory questionnaire above before this PR can be merged."
 
   ethics-engine:

From 45a77ec3374e34657c2fc64f6323c490e8d2ed9b Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Tue, 2 Dec 2025 14:35:40 +1100
Subject: [PATCH 18/19] fixing questionnaire problem

---
 .github/workflows/ethics-gate.yaml | 131 ++++++++++++++++++++---------
 1 file changed, 90 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml
index 31c4812..b738618 100644
--- a/.github/workflows/ethics-gate.yaml
+++ b/.github/workflows/ethics-gate.yaml
@@ -1,32 +1,36 @@
 on:
-  pull_request:
+  pull_request_target:
     types: [opened, reopened, synchronize]
-  pull_request_review_comment:
+  issue_comment:
     types: [created]
 
 permissions:
   contents: read          # needed for checkout
-  pull-requests: write    # needed for commenting & reviews (gh)
-  checks: write           # needed for check runs
+  pull-requests: write    # needed for commenting & reviews (gh) when running in pull_request_target
+  checks: write           # needed to create check runs
 
 jobs:
-  require-ethics-questionnaire:
-    if: github.event.pull_request.draft == false
+  # Job that posts the questionnaire (runs in the trusted pull_request_target context).
+  post-questionnaire:
+    if: github.event_name == 'pull_request_target' && github.event.pull_request.draft == false
     runs-on: ubuntu-latest
-    outputs:
-      ethics_status: ${{ steps.check.outputs.status }}
     steps:
-      - name: Checkout PR head (so .github/ files exist)
+      - name: Checkout base repo (safe; do NOT checkout PR head here)
         uses: actions/checkout@v4
         with:
-          ref: ${{ github.event.pull_request.head.sha }}
+          ref: ${{ github.event.pull_request.base.sha }}
+          fetch-depth: 0
+
+      - name: Authenticate gh CLI with GITHUB_TOKEN
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
+
       - name: Check if questionnaire already answered
         id: check
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
-          # Look for bot comment or issue form submission (be robust to empty output)
-          RESPONSES=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[].body' 2>/dev/null | grep -i "Ethics & Regulatory Questionnaire" -A 20 || true)
+          PR_NUMBER=${{ github.event.pull_request.number }}
+          # Collect PR comments (robust to empty output)
+          RESPONSES=$(gh pr view "$PR_NUMBER" --json comments --jq '.comments[].body' 2>/dev/null | grep -i "Ethics & Regulatory Questionnaire" -A 20 || true)
           if [[ -z "$RESPONSES" ]]; then
             echo "status=missing" >> $GITHUB_OUTPUT
           else
@@ -35,52 +39,89 @@ jobs:
 
       - name: Post questionnaire if missing
         if: steps.check.outputs.status == 'missing'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
+          # ensure file exists in the base repo checkout (case-sensitive)
+          if [[ ! -f .github/ETHICS_QUESTIONNAIRE.MD ]]; then
+            echo ".github/ETHICS_QUESTIONNAIRE.MD not found in base repo; aborting." >&2
+            exit 1
+          fi
           gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.MD
-          echo "Please fill out the ethical/regulatory questionnaire above before this PR can be merged."
+          echo "Posted ethics questionnaire to PR #${{ github.event.pull_request.number }}."
 
+  # Ethics engine: collects comments, runs evaluation, posts a check, and requests changes for HIGH risk.
+  # This job runs in the trusted context for pull_request_target and also on issue_comment (untrusted).
+  # For untrusted issue_comment runs, write actions (requesting changes) may be skipped if permissions are restricted.
   ethics-engine:
-    needs: require-ethics-questionnaire
-    if: >
-      needs.require-ethics-questionnaire.outputs.ethics_status == 'answered' ||
-      github.event_name == 'pull_request_review_comment'
     runs-on: ubuntu-latest
+    needs: post-questionnaire
     steps:
-      - name: Checkout PR head (engine needs code + workflow files)
+      - name: Checkout base repo (we run parser from base repo)
         uses: actions/checkout@v4
         with:
-          ref: ${{ github.event.pull_request.head.sha }}
+          ref: ${{ github.event.pull_request.base.sha || github.ref }}
+          fetch-depth: 0
+
+      - name: Authenticate gh CLI with GITHUB_TOKEN
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
+
+      - name: Determine PR number
+        id: prnumber
+        run: |
+          # Determine PR number whether triggered by pull_request_target or issue_comment
+          PR_NUMBER=$(jq -r 'if .pull_request then .pull_request.number elif .issue then .issue.number else empty end' "$GITHUB_EVENT_PATH")
+          if [[ -z "$PR_NUMBER" ]]; then
+            echo "No PR number found in event payload; exiting."
+            echo "risk=UNKNOWN" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
+
+      - name: Collect comments
+        id: collect
+        run: |
+          PR=${{ steps.prnumber.outputs.pr_number }}
+          # Gather all PR comments into a single string (robust to empty)
+          ANSWERS=$(gh pr view "$PR" --json comments --jq '[.comments[].body] | join("\n\n")' 2>/dev/null || true)
+          echo "$ANSWERS" > answers.txt
+          # Expose the answers (trim to avoid huge output)
+          echo "answers=$(echo "$ANSWERS" | head -c 32768 | sed -e 's/"/'"'"'"/g')" >> $GITHUB_OUTPUT
 
-      - name: Parse latest ethics comment & run engine
-        id: engine
+      - name: Run ethics parser & evaluator (safe: runs code from base repo)
+        id: run_engine
         env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ steps.prnumber.outputs.pr_number }}
         run: |
-          # Collect comments (be robust if there are none)
-          ANSWERS=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '[.comments[].body] | join("\n\n")' 2>/dev/null || true)
-          python .github/workflows/parse_and_evaluate.py "$ANSWERS" > result.txt || true
+          # Ensure parser exists
+          if [[ ! -f .github/workflows/parse_and_evaluate.py ]]; then
+            echo "Parser .github/workflows/parse_and_evaluate.py not found in base repo; aborting."
+            echo "RISK_LEVEL=UNKNOWN" > result.txt
+          else
+            python3 .github/workflows/parse_and_evaluate.py "$(cat answers.txt)" > result.txt || true
+          fi
           cat result.txt
-          RISK=$(grep RISK_LEVEL result.txt | cut -d= -f2 || echo "LOW")
+          # Extract RISK_LEVEL=XYZ from result.txt if present
+          RISK=$(grep -m1 '^RISK_LEVEL=' result.txt | cut -d= -f2 || echo "LOW")
           echo "risk=$RISK" >> $GITHUB_OUTPUT
 
-      - uses: actions/github-script@v7
+      - name: Create/update "Ethics Review" check run
+        uses: actions/github-script@v7
         with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
-            const risk = "${{ steps.engine.outputs.risk }}".trim();
+            const risk = "${{ steps.run_engine.outputs.risk }}".trim();
             const conclusions = {
               "LOW":    "success",
               "MEDIUM": "action_required",
               "HIGH":   "failure"
             };
             const conclusion = conclusions[risk] || "failure";
-
+            const head_sha = (context.payload.pull_request && context.payload.pull_request.head && context.payload.pull_request.head.sha) || (context.payload.issue && context.payload.issue.pull_request && context.payload.issue.number ? undefined : undefined) || github.event.pull_request?.head?.sha;
             await github.rest.checks.create({
               owner: context.repo.owner,
               repo: context.repo.repo,
               name: "Ethics Review",
-              head_sha: (context.payload.pull_request && context.payload.pull_request.head && context.payload.pull_request.head.sha) || github.event.pull_request.head.sha,
+              head_sha: head_sha || context.sha,
               status: "completed",
               conclusion,
               output: {
@@ -89,11 +130,19 @@ jobs:
               }
             });
 
-      - name: Block merge on HIGH risk
-        if: steps.engine.outputs.risk == 'HIGH'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      - name: Request changes on HIGH risk (trusted-only; skip on untrusted events)
+        if: steps.run_engine.outputs.risk == 'HIGH'
+        run: |
+          PR=${{ steps.prnumber.outputs.pr_number }}
+          # Only attempt to request changes when running in pull_request_target context (trusted).
+          if [[ "${GITHUB_EVENT_NAME}" != "pull_request_target" ]]; then
+            echo "Not in pull_request_target context; skipping request-changes (insufficient permissions for fork PRs)."
+            exit 0
+          fi
+          # Request changes using gh (GITHUB_TOKEN from pull_request_target has write rights)
+          gh pr review "$PR" --request-changes -b "@ethics-team Required manual review for high-risk change"
+          echo "Requested changes on PR #$PR due to HIGH risk."
+
+      - name: Final status message
         run: |
-          gh pr review ${{ github.event.pull_request.number }} \
-            --request-changes \
-            -b "@ethics-team Required manual review for high-risk change"
\ No newline at end of file
+          echo "Ethics engine completed. Risk level: ${{ steps.run_engine.outputs.risk }}"
\ No newline at end of file

From d4a2144905bbd8ca0606758d44ff7999d244c754 Mon Sep 17 00:00:00 2001
From: RamGcia <ramonricgarcia@gmail.com>
Date: Tue, 2 Dec 2025 14:38:13 +1100
Subject: [PATCH 19/19] minor issue fix

---
 .github/workflows/ethics-gate.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml
index b738618..28aee42 100644
--- a/.github/workflows/ethics-gate.yaml
+++ b/.github/workflows/ethics-gate.yaml
@@ -87,7 +87,7 @@ jobs:
           # Expose the answers (trim to avoid huge output)
           echo "answers=$(echo "$ANSWERS" | head -c 32768 | sed -e 's/"/'"'"'"/g')" >> $GITHUB_OUTPUT
 
-      - name: Run ethics parser & evaluator (safe: runs code from base repo)
+      - name: Run ethics parser & evaluator (safe runs code from base repo)
         id: run_engine
         env:
           PR_NUMBER: ${{ steps.prnumber.outputs.pr_number }}