diff --git a/.github/ETHICS_QUESTIONNAIRE.MD b/.github/ETHICS_QUESTIONNAIRE.MD
new file mode 100644
index 0000000..57223a3
--- /dev/null
+++ b/.github/ETHICS_QUESTIONNAIRE.MD
@@ -0,0 +1,36 @@
+**Ethics & Regulatory Questionnaire**  
+*This PR cannot be merged until this form is completed.*
+
+Please reply to this comment and answer all questions below (you can copy-paste and fill it).
+
+1. Does this change involve any of the following? (check all that apply)  
+   - [ ] Training or fine-tuning of AI/ML models  
+   - [ ] Inference/serving of AI/ML models in production  
+   - [ ] Processing of personal data (PII, health, biometric, financial, children’s data, etc.)  
+   - [ ] Dual-use or military-applicable technology  
+   - [ ] Safety-critical systems (medical device, aviation, automotive, etc.)  
+   - [ ] High-impact algorithmic decision-making (credit, hiring, criminal justice, etc.)  
+   - [ ] None of the above (pure docs, tests, CI, formatting, etc.)
+
+2. Estimated risk level (your honest assessment)  
+   - [ ] Low – no ethical or regulatory impact  
+   - [ ] Medium – possible fairness/privacy concerns  
+   - [ ] High – potential for serious harm or legal non-compliance
+
+3. Brief description of any ethical/regulatory impact (or write “None”)
+
+   > 
+
+4. Relevant regulations / standards considered (e.g., EU AI Act, GDPR, HIPAA, NIST AI RMF, export controls, etc.)  
+   List them or write “N/A”
+
+   > 
+
+5. Have mitigation measures been implemented (bias testing, data minimization, consent flows, etc.)?  
+   - [ ] Yes → describe below  
+   - [ ] No  
+   - [ ] Not applicable
+
+   > 
+
+Thank you! The ethics gate will evaluate your answers automatically.
\ No newline at end of file
diff --git a/.github/issue_template/questionnaire.yaml b/.github/issue_template/questionnaire.yaml
new file mode 100644
index 0000000..b770075
--- /dev/null
+++ b/.github/issue_template/questionnaire.yaml
@@ -0,0 +1,23 @@
+name: Ethics & Regulatory Questionnaire
+description: Required for all PRs with potential ethical/regulatory impact
+title: "[Ethics Review] <PR title>"
+body:
+  - type: checkboxes
+    attributes:
+      label: Scope of Change
+      options:
+        - label: Involves training or inference of AI/ML models
+        - label: Processes personal data (PII, health, financial, etc.)
+        - label: Dual-use potential (could be used in weapons/autonomous systems)
+        - label: Affects safety-critical systems
+        - label: Purely documentation / tests / CI changes (safe)
+
+  - type: textarea
+    attributes:
+      label: Description of ethical/regulatory impact (if any)
+      placeholder: Explain who might be harmed, fairness implications, compliance requirements, etc.
+
+  - type: dropdown
+    attributes:
+      label: Have you consulted the relevant regulatory framework?
+      options: ["Yes", "No", "Not applicable"]
\ No newline at end of file
diff --git a/.github/workflows/docker-build-deploy.yaml b/.github/workflows/docker-build-deploy.yaml
index 177a365..98c8e53 100644
--- a/.github/workflows/docker-build-deploy.yaml
+++ b/.github/workflows/docker-build-deploy.yaml
@@ -12,6 +12,9 @@ jobs:
   scan:
     runs-on: ubuntu-latest
     steps:
+      - name: Install presidio-analyzer
+        run: pip install presidio-analyzer
+
       - name: Checkout repository
         uses: actions/checkout@v2
 
@@ -24,15 +27,16 @@ jobs:
         run: |
           python -m venv venv
           source venv/bin/activate
+          pip install --upgrade pip
           pip install -r requirements.txt
-
+      
       - name: Run scan
         run: |
           source venv/bin/activate
           python main.py
 
-      - name: Upload scan report
-        uses: actions/upload-artifact@v2
-        with:
-          name: scan_report
-          path: reports/scan_report.json
\ No newline at end of file
+      ##- name: Upload scan report
+      ##  uses: actions/upload-artifact@v4
+      ##  with:
+       ##   name: scan_report
+      ##   path: reports/scan_report.json
\ No newline at end of file
diff --git a/.github/workflows/ethics-gate.yaml b/.github/workflows/ethics-gate.yaml
new file mode 100644
index 0000000..28aee42
--- /dev/null
+++ b/.github/workflows/ethics-gate.yaml
@@ -0,0 +1,148 @@
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+  issue_comment:
+    types: [created]
+
+permissions:
+  contents: read          # needed for checkout
+  pull-requests: write    # needed for commenting & reviews (gh) when running in pull_request_target
+  checks: write           # needed to create check runs
+
+jobs:
+  # Job that posts the questionnaire (runs in the trusted pull_request_target context).
+  post-questionnaire:
+    if: github.event_name == 'pull_request_target' && github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout base repo (safe; do NOT checkout PR head here)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.base.sha }}
+          fetch-depth: 0
+
+      - name: Authenticate gh CLI with GITHUB_TOKEN
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
+
+      - name: Check if questionnaire already answered
+        id: check
+        run: |
+          PR_NUMBER=${{ github.event.pull_request.number }}
+          # Collect PR comments (robust to empty output)
+          RESPONSES=$(gh pr view "$PR_NUMBER" --json comments --jq '.comments[].body' 2>/dev/null | grep -i "Ethics & Regulatory Questionnaire" -A 20 || true)
+          if [[ -z "$RESPONSES" ]]; then
+            echo "status=missing" >> $GITHUB_OUTPUT
+          else
+            echo "status=answered" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Post questionnaire if missing
+        if: steps.check.outputs.status == 'missing'
+        run: |
+          # ensure file exists in the base repo checkout (case-sensitive)
+          if [[ ! -f .github/ETHICS_QUESTIONNAIRE.MD ]]; then
+            echo ".github/ETHICS_QUESTIONNAIRE.MD not found in base repo; aborting." >&2
+            exit 1
+          fi
+          gh pr comment ${{ github.event.pull_request.number }} --body-file .github/ETHICS_QUESTIONNAIRE.MD
+          echo "Posted ethics questionnaire to PR #${{ github.event.pull_request.number }}."
+
+  # Ethics engine: collects comments, runs evaluation, posts a check, and requests changes for HIGH risk.
+  # This job runs in the trusted context for pull_request_target and also on issue_comment (untrusted).
+  # For untrusted issue_comment runs, write actions (requesting changes) may be skipped if permissions are restricted.
+  ethics-engine:
+    runs-on: ubuntu-latest
+    needs: post-questionnaire
+    steps:
+      - name: Checkout base repo (we run parser from base repo)
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.base.sha || github.ref }}
+          fetch-depth: 0
+
+      - name: Authenticate gh CLI with GITHUB_TOKEN
+        run: |
+          echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token
+
+      - name: Determine PR number
+        id: prnumber
+        run: |
+          # Determine PR number whether triggered by pull_request_target or issue_comment
+          PR_NUMBER=$(jq -r 'if .pull_request then .pull_request.number elif .issue then .issue.number else empty end' "$GITHUB_EVENT_PATH")
+          if [[ -z "$PR_NUMBER" ]]; then
+            echo "No PR number found in event payload; exiting."
+            echo "risk=UNKNOWN" >> $GITHUB_OUTPUT
+            exit 0
+          fi
+          echo "pr_number=$PR_NUMBER" >> $GITHUB_OUTPUT
+
+      - name: Collect comments
+        id: collect
+        run: |
+          PR=${{ steps.prnumber.outputs.pr_number }}
+          # Gather all PR comments into a single string (robust to empty)
+          ANSWERS=$(gh pr view "$PR" --json comments --jq '[.comments[].body] | join("\n\n")' 2>/dev/null || true)
+          echo "$ANSWERS" > answers.txt
+          # Expose the answers (trim to avoid huge output)
+          echo "answers=$(echo "$ANSWERS" | head -c 32768 | sed -e 's/"/'"'"'"/g')" >> $GITHUB_OUTPUT
+
+      - name: Run ethics parser & evaluator (safe runs code from base repo)
+        id: run_engine
+        env:
+          PR_NUMBER: ${{ steps.prnumber.outputs.pr_number }}
+        run: |
+          # Ensure parser exists
+          if [[ ! -f .github/workflows/parse_and_evaluate.py ]]; then
+            echo "Parser .github/workflows/parse_and_evaluate.py not found in base repo; aborting."
+            echo "RISK_LEVEL=UNKNOWN" > result.txt
+          else
+            python3 .github/workflows/parse_and_evaluate.py "$(cat answers.txt)" > result.txt || true
+          fi
+          cat result.txt
+          # Extract RISK_LEVEL=XYZ from result.txt if present
+          RISK=$(grep -m1 '^RISK_LEVEL=' result.txt | cut -d= -f2 || echo "LOW")
+          echo "risk=$RISK" >> $GITHUB_OUTPUT
+
+      - name: Create/update "Ethics Review" check run
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const risk = "${{ steps.run_engine.outputs.risk }}".trim();
+            const conclusions = {
+              "LOW":    "success",
+              "MEDIUM": "action_required",
+              "HIGH":   "failure"
+            };
+            const conclusion = conclusions[risk] || "failure";
+            const head_sha = (context.payload.pull_request && context.payload.pull_request.head && context.payload.pull_request.head.sha) || (context.payload.issue && context.payload.issue.pull_request && context.payload.issue.number ? undefined : undefined) || github.event.pull_request?.head?.sha;
+            await github.rest.checks.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: "Ethics Review",
+              head_sha: head_sha || context.sha,
+              status: "completed",
+              conclusion,
+              output: {
+                title: risk === "LOW" ? "Ethics cleared" : `Ethics review: ${risk}`,
+                summary: risk === "LOW" ? "Low risk – automatically approved" : `Risk level ${risk} – review required`
+              }
+            });
+
+      - name: Request changes on HIGH risk (trusted-only; skip on untrusted events)
+        if: steps.run_engine.outputs.risk == 'HIGH'
+        run: |
+          PR=${{ steps.prnumber.outputs.pr_number }}
+          # Only attempt to request changes when running in pull_request_target context (trusted).
+          if [[ "${GITHUB_EVENT_NAME}" != "pull_request_target" ]]; then
+            echo "Not in pull_request_target context; skipping request-changes (insufficient permissions for fork PRs)."
+            exit 0
+          fi
+          # Request changes using gh (GITHUB_TOKEN from pull_request_target has write rights)
+          gh pr review "$PR" --request-changes -b "@ethics-team Required manual review for high-risk change"
+          echo "Requested changes on PR #$PR due to HIGH risk."
+
+      - name: Final status message
+        run: |
+          echo "Ethics engine completed. Risk level: ${{ steps.run_engine.outputs.risk }}"
\ No newline at end of file
diff --git a/.github/workflows/redeengine.py b/.github/workflows/redeengine.py
new file mode 100644
index 0000000..c9f5be2
--- /dev/null
+++ b/.github/workflows/redeengine.py
@@ -0,0 +1,38 @@
+import os
+import json
+import sys
+
+def evaluate_risk(answers):
+    risk_score = 0
+    flags = []
+
+    if answers.get("involves_ai", False):
+        risk_score += 3
+        flags.append("AI/ML component")
+    if answers.get("processes_pii", False):
+        risk_score += 5
+        flags.append("Personal data")
+    if answers.get("dual_use", False):
+        risk_score += 10
+        flags.append("🚨 Dual-use technology")
+    if answers.get("safety_critical", False):
+        risk_score += 8
+        flags.append("Safety-critical")
+
+    if "purely documentation" in answers.get("safe_changes", []):
+        return "LOW", "No ethical concerns detected."
+
+    if risk_score >= 10:
+        return "HIGH", " | ".join(flags)
+    elif risk_score >= 5:
+        return "MEDIUM", " | ".join(flags)
+    else:
+        return "LOW", "Minor changes"
+
+# Parse comment or form submission here (simplified)
+# In real use, you'd parse the actual comment body
+answers = json.loads(sys.argv[1])  # passed from workflow
+level, reason = evaluate_risk(answers)
+
+print(f"RISK_LEVEL={level}")
+print(f"REASON={reason}")
\ No newline at end of file
diff --git a/README.md b/README.md
index 9cf83ba..21540fc 100644
Binary files a/README.md and b/README.md differ
diff --git a/asset-scanner/.gitignore b/asset-scanner/.gitignore
new file mode 100644
index 0000000..2bbed19
--- /dev/null
+++ b/asset-scanner/.gitignore
@@ -0,0 +1,20 @@
+# PII & Secrets Scanner - do not commit files
+scan_report.json
+scan_report.local.json
+scan_report.shareable.json
+*.local.json
+local_scan_*.json
+temp_report_*.json
+
+# ignore any backup or temp reports
+*.json.bak
+*.json.tmp
+
+# Optional: ignore the raw findings before enrichment (if you ever dump them)
+raw_findings.json
+debug_scan.json
+
+# OS / editor 
+.DS_Store
+Thumbs.db
+*.log
\ No newline at end of file
diff --git a/asset-scanner/patterns.json b/asset-scanner/patterns.json
index 3c80dc3..9efd109 100644
--- a/asset-scanner/patterns.json
+++ b/asset-scanner/patterns.json
@@ -1,102 +1,42 @@
 {
-  "email": {
-    "pattern": "[a-zA-Z0-9+._%-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,63}",
-    "risk": "Medium",
-    "description": "Email address"
-  },
   "aws_access_key": {
     "pattern": "\\bAKIA[0-9A-Z]{16}\\b",
     "risk": "High",
-    "description": "AWS Access Key"
+    "description": "AWS Access Key ID"
   },
-  "aws_secret_access_key": {
+  "aws_secret_key": {
     "pattern": "(?<![A-Za-z0-9/+=])[A-Za-z0-9/+=]{40}(?![A-Za-z0-9/+=])",
     "risk": "High",
-    "description": "AWS Secret Access Key (40-char base64-like)"
+    "description": "AWS Secret Access Key"
   },
   "gcp_service_account_key": {
     "pattern": "-----BEGIN PRIVATE KEY-----[\\s\\S]+?-----END PRIVATE KEY-----",
     "risk": "High",
-    "description": "GCP Service Account Private Key"
-  },
-  "azure_client_secret": {
-    "pattern": "(?i)(?:\\bclient[-_ ]?secret\\b|\\bazure[-_ ]?secret\\b|\\bapp[-_ ]?registration[-_ ]?secret\\b)\\s*[:=]\\s*['\"]?[A-Za-z0-9+/_\\-=]{20,128}['\"]?",
-    "risk": "High",
-    "description": "Azure client secret only when labelled"
+    "description": "GCP / generic private key block"
   },
   "ssh_private_key": {
     "pattern": "-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----[\\s\\S]+?-----END (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----",
     "risk": "High",
-    "description": "SSH Private Key"
-  },
-  "jwt_secret": {
-    "pattern": "\\b[A-Za-z0-9_-]{10,}\\.([A-Za-z0-9_-]{10,})\\.([A-Za-z0-9_-]{10,})\\b",
-    "risk": "High",
-    "description": "JWT token (header.payload.signature)"
-  },
-  "api_token": {
-    "pattern": "(?i)(?:\\bapi[-_ ]?token\\b|\\bapi[-_ ]?key\\b|\\baccess[-_ ]?token\\b|\\bsecret\\b)\\s*[:=]\\s*['\"]?[A-Za-z0-9._\\-]{20,}['\"]?|\\bAuthorization\\s*:\\s*Bearer\\s+[A-Za-z0-9._\\-]{20,}\\b",
-    "risk": "Medium",
-    "description": "Generic API token / key when explicitly labelled or in an Authorization header"
-  },
-  "password": {
-    "pattern": "(?i)\\bpassword\\s*[:=]\\s*['\"][^'\"\\r\\n]+['\"]",
-    "risk": "High",
-    "description": "Hard-coded password in labelled field"
-  },
-  "credit_card": {
-    "pattern": "\\b(?:4\\d{12}(?:\\d{3})?|5[1-5]\\d{14}|3[47]\\d{13}|6(?:011|5\\d{2})\\d{12})\\b",
-    "risk": "High",
-    "description": "Common card brands (Luhn check recommended in code)"
-  },
-  "ssn": {
-    "pattern": "\\b\\d{3}-\\d{2}-\\d{4}\\b",
-    "risk": "High",
-    "description": "US Social Security Number"
-  },
-  "phone_number": {
-    "pattern": "\\b04\\d{2}\\s?\\d{3}\\s?\\d{3}\\b",
-    "risk": "Medium",
-    "description": "Australian mobile number (04## ### ###)"
-  },
-  "ip_address": {
-    "pattern": "\\b(?:(?:25[0-5]|2[0-4]\\d|1\\d\\d|\\d?\\d)\\.){3}(?:25[0-5]|2[0-4]\\d|1\\d\\d|\\d?\\d)\\b",
-    "risk": "Low",
-    "description": "IPv4 address (0–255 octets)"
-  },
-  "database_connection_string": {
-    "pattern": "(?i)\\b(?:jdbc:[^\\s'\";]+|postgresql://[^\\s'\";]+|mysql://[^\\s'\";]+|mongodb:(?:\\+srv)?:[^\\s'\";]+)\\b",
-    "risk": "High",
-    "description": "Database connection string"
+    "description": "SSH private key"
   },
   "tfn": {
     "pattern": "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b",
     "risk": "High",
-    "description": "Australian Tax File Number (apply checksum in code)"
+    "description": "Australian Tax File Number"
   },
   "medicare_number": {
     "pattern": "\\b\\d{4}\\s?\\d{5}\\s?\\d{1}(?:\\s?\\d)?\\b",
     "risk": "High",
-    "description": "Medicare card number (10 digits + optional 1-digit IRN)"
+    "description": "Australian Medicare number"
   },
-  "drivers_licence_number": {
-    "pattern": "(?i)\\bdriver'?s?\\s*licen[cs]e(?:\\s*(?:no\\.?|number|#))?\\s*[:#-]?\\s*([A-Z0-9]{6,10})\\b",
+  "password_in_code": {
+    "pattern": "(?i)\\bpassword\\s*[:=]\\s*['\"][^'\"\\r\\n]{4,}['\"]",
     "risk": "High",
-    "description": "AUS driver’s licence number only when explicitly labelled"
+    "description": "Hard-coded password"
   },
-  "address_au": {
-    "pattern": "(?is)\\b\\d{1,5}\\s+[A-Za-z][A-Za-z’'\\-\\. ]+\\s+(?:St|Street|Rd|Road|Ave|Avenue|Blvd|Boulevard|Dr|Drive|Ln|Lane|Ct|Court|Pl|Place|Pde|Parade|Ter|Terrace|Way)\\b(?:,\\s*[A-Za-z][A-Za-z ’'\\-]+)?(?:,\\s*(?:VIC|NSW|QLD|SA|WA|TAS|ACT|NT))?(?:\\s+\\d{4})?(?!.{0,200}(?:\\bfull[_\\s-]?name\\b|\\bname\\b|[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,63}|\\+?[1-9]\\d{8,14}|\\bTFN\\b|\\bMedicare\\b|licen[cs]e|driver))",
-    "risk": "Low",
-    "description": "Australian street address (standalone)"
-  },
-  "address_au_with_pii": {
-    "pattern": "(?is)\\b\\d{1,5}\\s+[A-Za-z][A-Za-z’'\\-\\. ]+\\s+(?:St|Street|Rd|Road|Ave|Avenue|Blvd|Boulevard|Dr|Drive|Ln|Lane|Ct|Court|Pl|Place|Pde|Parade|Ter|Terrace|Way)\\b(?:,\\s*[A-Za-z][A-Za-z ’'\\-]+)?(?:,\\s*(?:VIC|NSW|QLD|SA|WA|TAS|ACT|NT))?(?:\\s+\\d{4})?(?=.{0,200}(?:\\bfull[_\\s-]?name\\b|\\bname\\b|[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,63}|\\+?[1-9]\\d{8,14}|\\bTFN\\b|\\bMedicare\\b|licen[cs]e|driver))",
+  "generic_secret": {
+    "pattern": "(?i)(?:secret|token|key|passwd)\\s*[:=]\\s*['\"][A-Za-z0-9._\\-+/=]{20,}['\"]",
     "risk": "High",
-    "description": "Australian street address near other identifiers (name/email/phone/ID)"
-  },
-  "name_full": {
-    "pattern": "(?i)\\b(?:full[_\\s-]?name|name|first[_\\s-]?name|last[_\\s-]?name)\\s*[:=]\\s*['\"]?[A-Z][a-z]+(?:[ -][A-Z][a-z]+){1,3}['\"]?",
-    "risk": "Low",
-    "description": "Full name in a labelled field"
+    "description": "Generic labelled secret/token"
   }
 }
\ No newline at end of file
diff --git a/asset-scanner/requirements.txt b/asset-scanner/requirements.txt
index 0004b20..c2f1b48 100644
--- a/asset-scanner/requirements.txt
+++ b/asset-scanner/requirements.txt
@@ -15,3 +15,5 @@ pytz==2025.2
 six==1.17.0
 typing_extensions==4.13.2
 tzdata==2025.2
+presidio-analyzer
+presidio-anonymizer
\ No newline at end of file
diff --git a/asset-scanner/scan_report.json b/asset-scanner/scan_report.json
index c44b3f4..e69de29 100644
--- a/asset-scanner/scan_report.json
+++ b/asset-scanner/scan_report.json
@@ -1,94 +0,0 @@
-[
-  {
-    "pattern": "ssn",
-    "description": "US Social Security Number",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 18,
-    "risk": "High",
-    "tip": "Remove or mask SSNs; handle only within strictly controlled, compliant systems.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 32 \u2014 Security of processing",
-      "California Civil Code \u00a7 1798.85 \u2014 SSN confidentiality (if applicable)"
-    ],
-    "raw": "854-67-0739"
-  },
-  {
-    "pattern": "ssn",
-    "description": "US Social Security Number",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 28,
-    "risk": "High",
-    "tip": "Remove or mask SSNs; handle only within strictly controlled, compliant systems.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 32 \u2014 Security of processing",
-      "California Civil Code \u00a7 1798.85 \u2014 SSN confidentiality (if applicable)"
-    ],
-    "raw": "889-46-3504"
-  },
-  {
-    "pattern": "phone_number",
-    "description": "Australian mobile number (04## ### ###)",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 16,
-    "risk": "Low",
-    "tip": "Obfuscate where possible; avoid logging full numbers; limit retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "0448 368 249"
-  },
-  {
-    "pattern": "phone_number",
-    "description": "Australian mobile number (04## ### ###)",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 26,
-    "risk": "Low",
-    "tip": "Obfuscate where possible; avoid logging full numbers; limit retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "0412 522 261"
-  },
-  {
-    "pattern": "name_full",
-    "description": "Full name in a labelled field",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 16,
-    "risk": "Low",
-    "tip": "Mask or omit full names in code/logs unless strictly required; minimise collection and retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-      "GDPR Art. 4(1) \u2014 Personal data",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "Name: Amy Nguyen"
-  },
-  {
-    "pattern": "name_full",
-    "description": "Full name in a labelled field",
-    "file": "/Users/mitchelltuininga/Documents/GitHub/generated_files/Screenshot 2025-09-16 at 2.40.04\u202fpm.png",
-    "line": 26,
-    "risk": "Low",
-    "tip": "Mask or omit full names in code/logs unless strictly required; minimise collection and retention.",
-    "law": "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-    "compliance": [
-      "Privacy Act 1988 (Cth) \u2014 APP 11 (Security of personal information)",
-      "GDPR Art. 4(1) \u2014 Personal data",
-      "GDPR Art. 5(1)(c) \u2014 Data minimisation",
-      "GDPR Art. 32 \u2014 Security of processing"
-    ],
-    "raw": "Name: Nicole Williams"
-  }
-]
\ No newline at end of file
diff --git a/asset-scanner/scanner.py b/asset-scanner/scanner.py
index 1439ed2..04103cd 100644
--- a/asset-scanner/scanner.py
+++ b/asset-scanner/scanner.py
@@ -1,15 +1,30 @@
 #!/usr/bin/env python3
-"""
-scanner.py — unified scanner compatible with:
-  - patterns.json (dict: {id: {pattern, risk, description}})
-  - reporter.py (Belle's Stream 4: write_report & generate_console_report)
 
-Findings schema produced here:
-  { "pattern": <id>, "file": <path>, "line": <int>, "match": <raw>, "description": <str> }
+#!/usr/bin/env python3
+"""
+scanner.py — Redback Ethics PII & Secrets Scanner (Presidio-powered)
+
+Features:
+  • Hybrid detection: Microsoft Presidio (NLP) + custom regex fallback
+  • High-accuracy detection of names, emails, phones, credit cards, addresses
+  • Keeps full compatibility with:
+      - patterns.json → only secrets not covered by Presidio needed
+      - reporter.py → identical findings schema and exit code behavior
+
+Findings schema (unchanged):
+  {
+    "pattern": <id from patterns.json or Presidio entity>,
+    "file": <path>,
+    "line": <int>,
+    "match": <raw string>,
+    "description": <str>
+  }
 
 Exit code:
-  - 1 if any High-risk finding (per risk_rules.json via reporter.write_report)
-  - 0 otherwise
+  • 1 → if any High-risk finding (via reporter.write_report + risk_rules.json)
+  • 0 → otherwise
+
+Now requires: pip install presidio-analyzer
 """
 
 from __future__ import annotations
@@ -18,176 +33,178 @@
 import re
 import sys
 from bisect import bisect
-from typing import Dict, Any, Iterable, List, Tuple
+from typing import Dict, Any, Iterable, List
 import os
 
-# v1/v3 utilities (project-provided)
-from file_handler import find_files, read_file
+# Presidio 
+try:
+    from presidio_analyzer import AnalyzerEngine
+    from presidio_analyzer import PatternRecognizer, Pattern
+except ImportError:
+    print("[!] ERROR: presidio-analyzer not installed")
+    print("    Run: pip install presidio-analyzer")
+    sys.exit(1)
 
-# Belle's reporter (Stream 4)
+from file_handler import find_files, read_file
 from reporter import write_report, generate_console_report
 
-# ---- Defaults (align with your repo) ----
+# Defaults
 DEFAULT_PATTERNS_FILE = "patterns.json"
 DEFAULT_TARGET_EXTS = [".py", ".txt", ".md", ".cfg", ".json", ".docx", ".csv", ".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".webp"]
-DEFAULT_OUT = "scan_report.json"
+DEFAULT_OUT = "scan_report.local.json"
 
-# ---- Patterns ----
+# Presidio Engine (auto-download once)
+def get_analyzer() -> AnalyzerEngine:
+    print("[i] Initializing Presidio analyzer (first run downloads ~120 MB model)...")
+    return AnalyzerEngine()
 
+# Load patterns.json 
 def load_patterns(path: str) -> Dict[str, Dict[str, Any]]:
-    """
-    Load pattern definitions from patterns.json
-    Expected shape:
-      {
-        "email": { "pattern": "...", "risk": "Low|High|...", "description": "..." },
-        ...
-      }
-    """
     with open(path, "r", encoding="utf-8") as f:
         data = json.load(f)
-    if not isinstance(data, dict):
-        raise ValueError("patterns.json must be a JSON object mapping ids to rules.")
-    for pid, rule in data.items():
-        if "pattern" not in rule:
-            raise ValueError(f"Pattern '{pid}' is missing the 'pattern' field.")
+    print(f"[i] Loaded {len(data)} patterns from {path}")
     return data
 
-def compile_patterns(patterns: Dict[str, Dict[str, Any]]) -> Dict[str, re.Pattern]:
-    """Compile all regexes once with DOTALL (to match across lines where needed)."""
-    compiled: Dict[str, re.Pattern] = {}
-    for pid, rule in patterns.items():
-        pat = rule["pattern"]
-        try:
-            compiled[pid] = re.compile(pat, re.DOTALL)
-        except re.error as e:
-            raise ValueError(f"Invalid regex for pattern '{pid}': {e}")
-    return compiled
-
-# ---- Scanning helpers ----
-
-def _newline_indices(text: str) -> List[int]:
-    return [i for i, ch in enumerate(text) if ch == "\n"]
-
-def _line_number(newlines: List[int], idx: int) -> int:
-    # 1-based line numbers: count of newlines before idx + 1
-    return bisect(newlines, idx) + 1
-
-def scan_text(text: str, file_path: str,
-              compiled: Dict[str, re.Pattern],
-              meta: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """
-    Run all compiled patterns over a text blob, recording file and line per match.
-    Returns a list of finding dicts for reporter.py.
-    """
+#  Core scanning 
+def scan_text(text: str, file_path: str, analyzer: AnalyzerEngine, patterns_meta: Dict) -> List[Dict[str, Any]]:
     findings: List[Dict[str, Any]] = []
-    if not text:
+    if not text.strip():
         return findings
 
-    newlines = _newline_indices(text)
-    for pid, regex in compiled.items():
-        desc = meta.get(pid, {}).get("description", pid)
-        for m in regex.finditer(text):
-            start = m.start()
-            line = _line_number(newlines, start)
-            raw = m.group(0)
+    newlines = [i for i, c in enumerate(text) if c == "\n"]
+
+    # Presidio scan
+    try:
+        results = analyzer.analyze(text=text, language="en", score_threshold=0.01)
+        print(f"[i] Presidio found {len(results)} potential entities in {os.path.basename(file_path)}")
+
+        for r in results:
+            if r.score < 0.3:
+                continue
+
+            # Map Presidio entity to pattern ID
+            entity = r.entity_type.upper()
+            pattern_id = None
+
+            # Direct match via "presidio_entity" field in patterns.json
+            for pid, rule in patterns_meta.items():
+                if rule.get("presidio_entity", "").upper() == entity:
+                    pattern_id = pid
+                    break
+            # Fallback: common built-in names
+            if not pattern_id:
+                fallback_map = {
+                    "EMAIL_ADDRESS": "email",
+                    "PHONE_NUMBER": "phone",
+                    "CREDIT_CARD": "credit_card",
+                    "US_SSN": "ssn",
+                    "PERSON": "full_name",
+                    "LOCATION": "location",
+                    "IP_ADDRESS": "ip_address"
+                }
+                pattern_id = fallback_map.get(entity, entity.lower())
+
+            line = bisect(newlines, r.start) + 1
+            match_text = text[r.start:r.end]
+
             findings.append({
-                "pattern": pid,
+                "pattern": pattern_id,
                 "file": file_path,
                 "line": line,
-                "match": raw,
-                "description": desc
+                "match": match_text,
+                "description": patterns_meta.get(pattern_id, {}).get("description", f"Detected {entity}")
             })
+            print(f"    → Found: {pattern_id} | {match_text} | Line {line}")
+
+    except Exception as e:
+        print(f"[!] Presidio crashed: {e}")
+
+    # regex fallback
+    for pid, rule in patterns_meta.items():
+        pat = rule.get("pattern")
+        if not pat or pat == "NOT_NEEDED":
+            continue
+        try:
+            for m in re.finditer(pat, text, re.DOTALL):
+                line = bisect(newlines, m.start()) + 1
+                findings.append({
+                    "pattern": pid,
+                    "file": file_path,
+                    "line": line,
+                    "match": m.group(0),
+                    "description": rule.get("description", pid)
+                })
+                print(f"    → Regex hit: {pid} | {m.group(0)} | Line {line}")
+        except re.error:
+            pass
+
     return findings
 
-def scan_paths(paths: Iterable[str],
-               compiled: Dict[str, re.Pattern],
-               meta: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]:
-    all_findings: List[Dict[str, Any]] = []
+# file scanner
+def scan_paths(paths: Iterable[str], analyzer: AnalyzerEngine, patterns_meta: Dict) -> List[Dict[str, Any]]:
+    all_findings = []
     for path in paths:
+        print(f"\n[i] Reading: {path}")
         content = read_file(path)
-        # Ensure we have text (read_file should return str; if bytes, decode)
         if isinstance(content, bytes):
             try:
                 content = content.decode("utf-8")
-            except UnicodeDecodeError:
+            except:
                 content = content.decode("latin-1", errors="ignore")
-        if not isinstance(content, str):
-            continue
-        all_findings.extend(scan_text(content, path, compiled, meta))
+        if isinstance(content, str) and content.strip():
+            print(f"    → Extracted {len(content):,} characters")
+            all_findings.extend(scan_text(content, path, analyzer, patterns_meta))
+        else:
+            print("    → No text extracted (image-only PDF?)")
     return all_findings
 
-# ---- CLI ----
-
-def parse_args(argv: List[str]) -> argparse.Namespace:
-    ap = argparse.ArgumentParser(description="Unified sensitive-data scanner")
-    ap.add_argument("--file", help="Single file to scan (overrides --root and --ext)")
-    ap.add_argument("--root", default=".", help="Root directory to scan (default: current dir)")
-    ap.add_argument("--patterns", default=DEFAULT_PATTERNS_FILE, help="Path to patterns.json")
-    ap.add_argument("--out", default=DEFAULT_OUT, help="Path to JSON report output")
-    ap.add_argument("--ext", nargs="*", default=DEFAULT_TARGET_EXTS,
-                    help="File extensions to include (e.g., .py .txt .md .cfg .json)")
-    ap.add_argument("--no-console", action="store_true", help="Skip console summary output")
-    return ap.parse_args(argv)
+# CLI & main
+def parse_args(argv=None):
+    # parse_args function
+    ap = argparse.ArgumentParser(description="Sensitive data scanner")
+    ap.add_argument("--file", help="Single file to scan")
+    ap.add_argument("--root", default=".", help="Root directory")
+    ap.add_argument("--patterns", default=DEFAULT_PATTERNS_FILE)
+    ap.add_argument("--out", default=DEFAULT_OUT)
+    ap.add_argument("--ext", nargs="*", default=DEFAULT_TARGET_EXTS)
+    ap.add_argument("--no-console", action="store_true")
+    return ap.parse_args(argv or sys.argv[1:])
 
-# Function to get a valid directory path from the user
 def get_valid_path():
     while True:
-        path = input("Enter the directory path containing the files to scan (press Enter to use the project folder): ").strip()
-        path = path.strip('"').strip("'")  # Remove surrounding quotes if present
-        if not path:  # If no input is provided, use the current directory
-            print("No path provided. Files will be scanned in the project folder.")
-            print("-" * 63)
+        path = input("Enter directory (or Enter for current): ").strip().strip('"\'')
+        if not path:
             return os.getcwd()
-        elif os.path.isdir(path):  # Validate the provided path
-            print("-" * 63)
+        if os.path.isdir(path):
             return path
-         
-        else:
-            print("We cannot find that path. Please enter a valid directory or press Enter to use the project folder.")
-
-# ---- Main ----
-
-def main(argv: List[str] | None = None) -> int:
-    ns = parse_args(argv or sys.argv[1:])
+        print("Invalid path, try again.")
 
-    patterns = load_patterns(ns.patterns)
-    compiled = compile_patterns(patterns)
+def main():
+    ns = parse_args()
+    patterns_meta = load_patterns(ns.patterns)
+    analyzer = get_analyzer()
 
-    # Check if a specific file is provided
     if ns.file:
-        # Validate the file path
-        if not os.path.isfile(ns.file):
-            print(f"[!] The specified file does not exist: {ns.file}")
-            return 1
-
-        # Scan only the specified file
-        print(f"[i] Scanning the specified file: {ns.file}")
-        findings = scan_paths([ns.file], compiled, patterns)
+        paths = [ns.file]
+        print(f"[i] Scanning single file: {ns.file}")
     else:
-        # Identify valid directory to scan
         directory = get_valid_path()
+        paths = list(find_files(directory, ns.ext))
+        print(f"[i] Found {len(paths)} files to scan in {directory}")
 
-        # Use project helper to expand files under root with extension filter
-        file_list = list(find_files(directory, ns.ext))
-        findings = scan_paths(file_list, compiled, patterns)
+    findings = scan_paths(paths, analyzer, patterns_meta)
 
-    # JSON report (enriched with risk/tip/laws by reporter.write_report)
     enriched = write_report(findings, out_path=ns.out)
+    print(f"\n[i] Full report (with paths & raw PII) saved locally → {ns.out}")
+    print("    This file is git-ignored and must NEVER be committed.")
 
-    # Console summary (masked)
-    if not ns.no_console:
-        generate_console_report(findings)
-
-    # Exit code policy: fail if any High risk present
-    has_high = any(f.get("risk") == "High" for f in enriched)
-    if has_high:
-        print("[!] High-risk data found. Failing scan.")
+    if any(f.get("risk") == "High" for f in enriched):
+        print("\n[!] HIGH-RISK PII DETECTED → SCAN FAILED")
         return 1
-
-    if enriched:
-        print("[i] Findings present. Review the report.")
+    elif findings:
+        print(f"\n[i] {len(findings)} findings → check {ns.out}")
     else:
-        print("[✓] No sensitive data detected.")
+        print("\n[Success] NO PII FOUND!")
     return 0
 
 if __name__ == "__main__":
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..3ca2e89
--- /dev/null
+++ b/main.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import os
+print("Redback Ethics scanner – environment is ready!")
+print("Installed packages test:")
+
+try:
+    import spacy
+    print(f"spaCy {spacy.__version__} OK")
+except Exception as e:
+    print(f"spaCy failed: {e}")
+
+try:
+    from presidio_analyzer import AnalyzerEngine
+    print("Presidio Analyzer OK")
+except Exception as e:
+    print(f"Presidio failed: {e}")
+
+try:
+    import cv2
+    print(f"OpenCV {cv2.__version__} OK")
+except Exception as e:
+    print(f"OpenCV failed: {e}")
+
+print("\nNext step: actual scanner code needs to be added in scanner-bot/ or asset-scanner/")
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 15227e1..64e62f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,12 @@
+presidio-analyzer
+presidio-anonymizer
+spacy==3.8.0
 defusedxml==0.7.1
 Faker==37.1.0
 fonttools==4.57.0
 fpdf2==2.8.3
 lxml==5.4.0
-numpy==2.0.2
+numpy
 opencv-python==4.12.0.88
 packaging==25.0
 pandas==2.2.3
@@ -14,4 +17,4 @@ python-docx==1.1.2
 pytz==2025.2
 six==1.17.0
 typing_extensions==4.13.2
-tzdata==2025.2
\ No newline at end of file
+tzdata==2025.2