From 5b608e6fee477de58171a8c315183d2df5330c88 Mon Sep 17 00:00:00 2001 From: Uvin Perera Date: Tue, 19 Aug 2025 19:10:57 +1000 Subject: [PATCH 1/3] feat: add anonymization demo endpoint --- api/privacy_demo.py | 26 ++++++++++++++++++++++++++ app.py | 4 ++++ instance/goals.db | Bin 8192 -> 16384 bytes my_database.db | Bin 0 -> 12288 bytes utils/privacy.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+) create mode 100644 api/privacy_demo.py create mode 100644 my_database.db create mode 100644 utils/privacy.py diff --git a/api/privacy_demo.py b/api/privacy_demo.py new file mode 100644 index 0000000..6b54606 --- /dev/null +++ b/api/privacy_demo.py @@ -0,0 +1,26 @@ +from flask import Blueprint, request, jsonify +from utils.privacy import ( + stable_hash, initials, pseudonym, + birth_year_from_iso, age_bucket_from_year +) + +# Create a Blueprint for privacy demo that can be plugged into the main Flask app +bp = Blueprint('privacy_demo', __name__, url_prefix = "/debug") + +@bp.post("/anonymize") + +def anonymize_demo(): + data = request.get_json(silent = True) or {} + email = data.get("email") + name = data.get("name") + dob = data.get("dob") + + by = birth_year_from_iso(dob) + + return jsonify({ + "email_hash": stable_hash(email) if email else None, + "name_initials": initials(name), + "pseudonymId": pseudonym("http://redback.fit/user", email or name or "temp"), + "birth_year": by, + "age_bucket": age_bucket_from_year(by) + }) diff --git a/app.py b/app.py index f9f3767..e667de9 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,8 @@ from api.dashboard import dashboard_bp from models import db from dotenv import load_dotenv +from api.privacy_demo import bp as privacy_demo_bp + import os import pyrebase @@ -52,6 +54,8 @@ app.register_blueprint(dashboard_bp, url_prefix='/api/dashboard') app.register_blueprint(profile_api, url_prefix='/api/profile') +app.register_blueprint(privacy_demo_bp) + # Main index route (login + welcome) @app.route('/', methods=['GET', 'POST']) def index(): diff --git a/instance/goals.db b/instance/goals.db index bdb45dd8308cff3e0d07315760a7d79d870e98e3..82304afc1d5eb2adde92fc155ef6afedd2a01fdc 100644 GIT binary patch delta 565 zcmah`&r1S96rOb>b4fReFz7N_f`YADhmN`0QsU1R4fG=8?r4*?t2?t2U6eX@scYAS z4xPgPq_h1mtwy8=X?PFbd*8?VKHdzI!y)&ypL@@EGU@!%JCBpUY}E5sya}P^4+5JuujEMzBFFc6Evv4$hGlx3sXK zV{0jl`iP-F_|P#ibyiX{d0DN(Y)z@cFwtkjN+EMmQDAsx%wQ2Aa?Gy=vLOx-N8^_E zI)vdmvY9#S>+@HrC+559_1DI4zN%mFIwdELfH)faH(tbMjdO!ges}9cwkab9Om)y5 zwy@qnT3a%^*g}k$22JW1wTU5N-IkE;*p45>6Heq&2UAOnAxbf$vC_{^m53&Fjg~Oe SngDU)Xnl(NdwkyW-|BB2=c4)m delta 54 zcmZo@U~F)hAT7wqz`(!+#4x}(QOB5{kwLGln-?g=$hVq-Z}nzD0b9P!tNB6&0os}g AA^-pY diff --git a/my_database.db b/my_database.db new file mode 100644 index 0000000000000000000000000000000000000000..dd68529b53fab70f0e98f1863545d78371dc6abe GIT binary patch literal 12288 zcmeI#&r8EF6bJBj6NSOVTdz58Y>42+qjg3pw%zPfaHmq+I0|j&n$90*!Ha*c|A23u z6E_(19bLoPKrcsJbhG10*M2v=qr@v<$Av7k!v6xiuZB9i$0s;_#00bZa0SG_< q0uX=z1Rwx`9Tq_Szr#NlSAzfqAOHafKmY;|fB*y_009Vm3w!~BU8PX~ literal 0 HcmV?d00001 diff --git a/utils/privacy.py b/utils/privacy.py new file mode 100644 index 0000000..7c52a01 --- /dev/null +++ b/utils/privacy.py @@ -0,0 +1,44 @@ +import hashlib, os, re, uuid +from datetime import date + +# Optional salts for extra safety; we can add to a .env later +PII_SALT = os.getenv("PII_SALT", "") +PII_PEPPER = os.getenv("PII_PEPPER", "") + +def stable_hash(value: str | None) -> str | None: + """Deterministic, non-reversible hash for identifiers like email.""" + if not value: + return None + data = (PII_SALT + value + PII_PEPPER).encode("utf-8") + return hashlib.sha256(data).hexdigest() + +def initials(full_name: str | None) -> str | None: + """J D for 'Jane Doe'. Up to 3 initials, letters only.""" + if not full_name: + return None + parts = re.findall(r"[A-Za-z]+", full_name) + return "".join(p[0].upper() for p in parts[:3]) or None + +def pseudonym(namespace: str, raw_key: str) -> str: + """Stable pseudonym like user_7f3ab2 derived from a namespace + raw key.""" + ns = uuid.uuid5(uuid.NAMESPACE_URL, namespace) + return "user_" + uuid.uuid5(ns, raw_key).hex[:8] + +def birth_year_from_iso(dob_iso: str | None) -> int | None: + """YYYY[-MM[-DD]] -> YYYY""" + try: + return int(dob_iso[:4]) if dob_iso else None + except Exception: + return None + +def age_bucket_from_year(year: int | None) -> str | None: + """Return coarse age bucket for privacy (e.g., 18-24, 25-29, 30-34, ..., 50+).""" + if not year: + return None + today_year = date.today().year + age = max(0, today_year - year) + bins = [(0,17),(18,24),(25,29),(30,34),(35,39),(40,44),(45,49),(50,120)] + for lo, hi in bins: + if lo <= age <= hi: + return f"{lo:02d}-{hi:02d}" if hi < 120 else f"{lo:02d}+" + return None From edec2da2b5af3fab29f113274d10e1bdac71fe6e Mon Sep 17 00:00:00 2001 From: Uvin Perera Date: Wed, 3 Sep 2025 19:40:15 +1000 Subject: [PATCH 2/3] feat: anonymize PII in /api/profile GET response --- api/profile.py | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/api/profile.py b/api/profile.py index 0496bc3..055225c 100644 --- a/api/profile.py +++ b/api/profile.py @@ -1,10 +1,39 @@ # /api/profile.py from flask import Blueprint, jsonify, request from models.user import db, UserProfile -from flask_cors import CORS + +from utils.privacy import( + stable_hash, initials, pseudonym, + birth_year_from_iso, age_bucket_from_year +) api = Blueprint('profile_api', __name__) +def anonymize_user_record(u): + """Build an anonymized profile payload from a UserProfile row. + Hides raw PII (name/email/DOB) and exposes safe equivalents.""" + + if not u: + return None + + # Choose a stable raw key for pseudonymization + raw_key = getattr(u, "email", None) or getattr(u, "name", None) or str(getattr(u, "id", "")) + + # Your model uses 'birthDate' (ISO string) rather than 'dob' + by = birth_year_from_iso(getattr(u, "birthDate", None)) + + return { + "id": getattr(u, "id", None), # safe to keep if your API expects it + "pseudoId": pseudonym("https://redback.fit/user", raw_key), + "nameInitials": initials(getattr(u, "name", None)), + "emailHash": stable_hash(getattr(u, "email", None)), + "ageBucket": age_bucket_from_year(by), + + # keep non-PII fields you were already returning + "account": getattr(u, "account", None), + "gender": getattr(u, "gender", None), + "avatar": getattr(u, "avatar", None), + } # Profile Endpoints # @@ -17,15 +46,10 @@ def get_profile(): user_id = 1 # Replace with authenticated user ID user = UserProfile.query.filter_by(id=user_id).first() - if user: - return jsonify({ - 'name': user.name, - 'account': user.account, - 'birthDate': user.birthDate, - 'gender': user.gender, - 'avatar': user.avatar - }) - return jsonify({'message': 'User not found'}), 404 + if not user: + return jsonify({'message': 'User not found'}), 404 + + return jsonify(anonymize_user_record(user)), 200 @api.route('', methods=['POST']) From 424b8fdcd0eb6bf03743e7a2dbcaba2e8ae52200 Mon Sep 17 00:00:00 2001 From: Uvin Perera Date: Fri, 26 Sep 2025 18:22:33 +1000 Subject: [PATCH 3/3] test: anonymization changes for review --- api/profile.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/api/profile.py b/api/profile.py index 055225c..1d38516 100644 --- a/api/profile.py +++ b/api/profile.py @@ -16,8 +16,8 @@ def anonymize_user_record(u): if not u: return None - # Choose a stable raw key for pseudonymization - raw_key = getattr(u, "email", None) or getattr(u, "name", None) or str(getattr(u, "id", "")) + email_value = getattr(u, "email", None) or getattr(u, "account", None) + raw_key = email_value or getattr(u, "name", None) or str(getattr(u, "id", "")) # Your model uses 'birthDate' (ISO string) rather than 'dob' by = birth_year_from_iso(getattr(u, "birthDate", None)) @@ -26,7 +26,7 @@ def anonymize_user_record(u): "id": getattr(u, "id", None), # safe to keep if your API expects it "pseudoId": pseudonym("https://redback.fit/user", raw_key), "nameInitials": initials(getattr(u, "name", None)), - "emailHash": stable_hash(getattr(u, "email", None)), + "emailHash": stable_hash(email_value), "ageBucket": age_bucket_from_year(by), # keep non-PII fields you were already returning @@ -42,13 +42,14 @@ def anonymize_user_record(u): @api.route('', methods=['GET']) def get_profile(): - # In future develpment get the user_id from a session or token + # In future development get the user_id from a session or token user_id = 1 # Replace with authenticated user ID user = UserProfile.query.filter_by(id=user_id).first() if not user: return jsonify({'message': 'User not found'}), 404 + return jsonify(anonymize_user_record(user)), 200