Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
66 commits
Select commit Hold shift + click to select a range
cccbcc1
retrieve anonymized data for registry
JohannMrBot Jul 9, 2025
d2b3315
retrieve anonymized data for assessments
JohannMrBot Jul 10, 2025
257b445
get sensitivity from the database
JohannMrBot Jul 15, 2025
096767f
add flag to toggle data anonymization
JohannMrBot Jul 16, 2025
3e74f3f
add buttons to generate and download anonymized registry and assessments
JohannMrBot Jul 16, 2025
450cb2a
combine csv and xlsx product creation
JohannMrBot Jul 18, 2025
e8a2acd
add anonymized Report buttons
JohannMrBot Jul 18, 2025
00a5042
replace options value with their label
JohannMrBot Jul 21, 2025
de45683
create anonymized table on registry start
JohannMrBot Jul 22, 2025
6a9db08
anonymized farmer name options
JohannMrBot Jul 23, 2025
e6eda40
add api route for getting results
JohannMrBot Jul 28, 2025
f7f7e41
add question anonymity and type to database model
JohannMrBot Jul 28, 2025
eb3cbfd
refactor field query creation
JohannMrBot Jul 30, 2025
efe7fa8
move register form validation to view class
JohannMrBot Jul 30, 2025
ca9839b
add order to question types
JohannMrBot Jul 31, 2025
aeea733
add labels for question anonymity
JohannMrBot Jul 31, 2025
ef346f7
get question types from database
JohannMrBot Jul 31, 2025
3ebd711
add select input for anonymity
JohannMrBot Aug 1, 2025
dbcf308
hide anonymity when not sensitive
JohannMrBot Aug 4, 2025
43abe8e
relate type and anonymity in a table
JohannMrBot Aug 5, 2025
693c116
add anonymization table model
JohannMrBot Aug 5, 2025
a61a2ca
add anonymization params to interface
JohannMrBot Aug 6, 2025
bb14f59
add anonymization params validation in interface
JohannMrBot Aug 7, 2025
0edc94e
add question enums in javascript
JohannMrBot Aug 7, 2025
933e9c2
retrieve type info from enums
JohannMrBot Aug 8, 2025
375b280
add params to GET question details
JohannMrBot Aug 8, 2025
35a9ff8
add anonymization process
JohannMrBot Aug 9, 2025
fef3abb
add anonymization for dates with year-month
JohannMrBot Aug 11, 2025
53a38c7
remove anonymized values when form is canceled
JohannMrBot Aug 11, 2025
04e9dde
add duplicate entry handling in anonymized
JohannMrBot Aug 13, 2025
9f18f79
add geo noise anonymization
JohannMrBot Aug 14, 2025
6ee884a
fix template format
JohannMrBot Aug 14, 2025
36a58e7
add foreign key to anonymized referencing REG
JohannMrBot Aug 18, 2025
8a11034
add package number extraction from form
JohannMrBot Aug 18, 2025
d3763ef
move anonymization to perform after
JohannMrBot Aug 18, 2025
55450e3
remove foreign key to anonymized referencing REG
JohannMrBot Aug 21, 2025
8e65970
refactor anonymization db processes: change location
JohannMrBot Aug 22, 2025
b5ad929
add db processes to delete registry and assessment data
JohannMrBot Aug 22, 2025
ec9a1ed
refactor deletion of registry and assessment data
JohannMrBot Aug 22, 2025
fb130b9
create functions to select registry and assessments data
JohannMrBot Aug 25, 2025
3a8cb58
add anonymization to update data
JohannMrBot Aug 25, 2025
1660297
change download button layout in the dashboard
JohannMrBot Aug 27, 2025
1ba4a46
make downloads panel collapsible
JohannMrBot Aug 27, 2025
3627f53
fix registry and assessment start and cancel tests
JohannMrBot Aug 28, 2025
c2b3df6
extract methods for getting registry and assessment key questions
JohannMrBot Aug 28, 2025
721286a
add process anonymize entire project
JohannMrBot Aug 29, 2025
17e7937
set project results for api to be always anonymized
JohannMrBot Aug 29, 2025
3b3dc20
hide anonymized products for projects with no anonymized values
JohannMrBot Sep 1, 2025
03fc9c4
hide anonymized downloads when module.dataprivacy is false
JohannMrBot Sep 3, 2025
fce85f2
fix query to determine if project is anonymized
JohannMrBot Sep 3, 2025
4105818
anonymize packages farmername. Remove IMEI and instancename
JohannMrBot Sep 4, 2025
d44b82e
fix error message for pseudonym
JohannMrBot Sep 8, 2025
2239262
add error message for interval greater than zero
JohannMrBot Sep 10, 2025
560c7b8
remove unnecessary endpoint
JohannMrBot Oct 6, 2025
6a8fe74
fix anonymization
JohannMrBot Oct 6, 2025
69c9f5b
refactor and fix tests for ReadDataOfProjectViewApi
JohannMrBot Oct 7, 2025
105f8e8
create validator for project access
JohannMrBot Oct 7, 2025
832ebcb
fix circular import error
JohannMrBot Oct 9, 2025
8ff1858
add script to anonymize project
JohannMrBot Oct 10, 2025
94e2b80
add tests for get_question_by_field_name
JohannMrBot Oct 10, 2025
454bf0b
optimization 1 for get_question_by_field_name
JohannMrBot Oct 10, 2025
0fd11a6
add tool tips and change texts for question sensitivity
JohannMrBot Nov 3, 2025
a3d6a22
fix: catch database error
JohannMrBot Dec 19, 2025
0198e83
update texts for anonymization tooltips
JohannMrBot Jan 13, 2026
c81b950
group bound inputs
JohannMrBot Feb 3, 2026
6ceb998
alternate tooltip dynamically
JohannMrBot Feb 3, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 36 additions & 1 deletion climmob/models/climmobv4.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,14 @@ class Question_subgroup(Base):
parent_id = Column(Unicode(80), primary_key=True, nullable=True)


class QuestionType(Base):
__tablename__ = "question_type"

id = Column(Integer, primary_key=True, nullable=False)
name = Column(Unicode(64), nullable=False)
order = Column(Integer, nullable=False)


class Question(Base):
__tablename__ = "question"
__table_args__ = (
Expand All @@ -805,7 +813,7 @@ class Question(Base):
question_unit = Column(Unicode(120))
question_min = Column(Float, nullable=True)
question_max = Column(Float, nullable=True)
question_dtype = Column(Integer)
question_dtype = Column(Integer, ForeignKey("question_type.id"))
question_cmp = Column(Unicode(120))
question_reqinreg = Column(Integer, server_default=text("'0'"))
question_reqinasses = Column(Integer, server_default=text("'0'"))
Expand Down Expand Up @@ -835,12 +843,39 @@ class Question(Base):
qstgroups_user = Column(Unicode(80), nullable=True)
qstgroups_id = Column(Unicode(80), nullable=True)
question_sensitive = Column(Integer, server_default=text("'0'"))
question_anonymity = Column(Integer, ForeignKey("question_anonymity.id"))
question_lang = Column(ForeignKey("i18n.lang_code"), nullable=True)
extra = Column(MEDIUMTEXT(collation="utf8mb4_unicode_ci"))
i18n = relationship("I18n")
user = relationship("User")


class QuestionAnonymity(Base):
__tablename__ = "question_anonymity"

id = Column(Integer, primary_key=True, nullable=False)
name = Column(Unicode(64), nullable=False)


class QuestionTypeAnonymity(Base):
__tablename__ = "question_type_anonymity"

type_id = Column(
Integer, ForeignKey("question_type.id"), primary_key=True, nullable=False
)
anonymity_id = Column(
Integer, ForeignKey("question_anonymity.id"), primary_key=True, nullable=False
)


class AnonymizationParameter(Base):
__tablename__ = "anonymization_parameter"

question_id = Column(Integer, primary_key=True, nullable=False)
name = Column(Unicode(64), primary_key=True, nullable=False)
value = Column(Unicode(64), nullable=False)


class Registry(Base):
__tablename__ = "registry"
__table_args__ = (
Expand Down
2 changes: 2 additions & 0 deletions climmob/processes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,5 @@
from climmob.processes.db.project_location_unit_objective import *
from climmob.processes.db.location_unit_of_analysis_objectives import *
from climmob.processes.db.affiliation import *
from climmob.processes.db.anonymized import *
from climmob.processes.db.anonymization_params import *
50 changes: 50 additions & 0 deletions climmob/processes/db/anonymization_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
__all__ = ["save_anonymization_params", "get_anonymization_params_as_dict"]

import re

from climmob.models import mapFromSchema
from climmob.models.climmobv4 import AnonymizationParameter


def get_anonymization_params(question_id, request):
result = mapFromSchema(
request.dbsession.query(AnonymizationParameter)
.filter(AnonymizationParameter.question_id == question_id)
.all()
)
return result


def get_anonymization_params_as_dict(question_id, request):
params = get_anonymization_params(question_id, request)
result = {}
for param in params:
result[param["name"]] = param["value"]
return result


def save_anonymization_params(question_id, data, request):
delete_existing_anonymization_params(question_id, request)

params = []
for key in data.keys():
pattern = r"anonym_param_([a-z_]+)"
match = re.match(pattern, key)
if match:
params.append({"name": match.group(1), "value": data[key]})

for param in params:
new_param = AnonymizationParameter(**param)
new_param.question_id = question_id
request.dbsession.add(new_param)
request.dbsession.flush()


def delete_existing_anonymization_params(question_id, request):
try:
request.dbsession.query(AnonymizationParameter).filter(
AnonymizationParameter.question_id == question_id
).delete()
return True, ""
except Exception as e:
return False, str(e)
246 changes: 246 additions & 0 deletions climmob/processes/db/anonymized.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
import re
from datetime import datetime, date

from climmob.processes import get_project_cod_by_id, get_owner_user_name_by_project_id
from climmob.processes.db.results import getJSONResult
from climmob.models.repository import sql_execute
from climmob.processes.db.anonymization_params import get_anonymization_params_as_dict
from climmob.processes.db.question import (
get_sensitive_questions_anonymity_by_project_id,
)
from climmob.utility import (
get_question_by_field_name,
QuestionAnonymity,
add_noise_to_gps_coordinates,
QuestionType,
)

__all__ = [
"anonymize_questions",
"delete_anonymized_values_by_form_id",
"delete_anonymized_values_by_form_id_and_reg_id",
"update_anonymized",
"anonymize_project",
"is_project_anonymized",
]


def anonymize_project(project_id, request):
project_code = get_project_cod_by_id(project_id, request)
user_owner = get_owner_user_name_by_project_id(project_id, request)
questions = get_sensitive_questions_anonymity_by_project_id(project_id, request)

project_collected_data = getJSONResult(
user_owner, project_id, project_code, request
)["data"]

schema = user_owner + "_" + project_code

pattern = r"(REG|(ASS(.+?)))_(.*)"
for entry in project_collected_data:
reg_id = entry["REG_qst162"]
to_anonymize = []
for key in entry.keys():
if entry[key] is None:
continue
match = re.match(pattern, key)
if match is None:
continue
question = get_question_by_field_name(match.group(4), questions)
if (
question
and question.question_anonymity != QuestionAnonymity.REMOVE.value
):
if match.group(1) == "REG":
form_id = "-"
else:
form_id = match.group(3)
to_anonymize.append(
{
"field_name": match.group(4),
"value": entry[key],
"question": question,
"form_id": form_id,
}
)

for field in to_anonymize:
anonymize_field_value(field, reg_id, request)
success, msg = insert_anonymized_field(
field, field["form_id"], reg_id, schema
)
if not success:
if msg.startswith("Duplicate entry for package"):
# To ignore entries that are already anonymized
continue
return False, msg

return True, ""


def anonymize_questions(request, form, form_id, project_id, user_owner, project_cod):
questions = get_sensitive_questions_anonymity_by_project_id(project_id, request)

registry_id = None

schema = user_owner + "_" + project_cod

pattern = r"grp_\d+/(.+)"
to_anonymize = []

for key in form.keys():
match = re.fullmatch(pattern, key)
if not match:
continue
field_name = match.group(1)

if field_name == "QST162" or field_name == "QST163":
match = re.fullmatch(rf"({user_owner}-)?(\d+)(-{project_cod}~)?", form[key])
if not match:
return False, "Could not anonymize"
registry_id = match.group(2)
continue

question = get_question_by_field_name(field_name, questions)
if question and question.question_anonymity != QuestionAnonymity.REMOVE.value:
to_anonymize.append(
{"field_name": field_name, "value": form[key], "question": question}
)

if not to_anonymize:
return True

for field in to_anonymize:
anonymize_field_value(field, registry_id, request)
success, msg = insert_anonymized_field(field, form_id, registry_id, schema)
if not success:
return False, msg

return True, ""


def anonymize_field_value(field, registry_id, request):
params = get_anonymization_params_as_dict(field["question"].question_id, request)
if field["question"].question_anonymity == QuestionAnonymity.PSEUDONYM.value:
field["value"] = params["pseudonym"].replace("{}", registry_id)
elif field["question"].question_anonymity == QuestionAnonymity.RANGE.value:
if field["question"].question_dtype == QuestionType.INTEGER.value:
parser = int
else:
parser = float

field["value"] = parser(field["value"])
params["lower_bound"] = parser(params["lower_bound"])
params["upper_bound"] = parser(params["upper_bound"])
params["interval"] = parser(params["interval"])

if field["value"] < params["lower_bound"]:
field["value"] = f'<{params["lower_bound"]}'
elif field["value"] > params["upper_bound"]:
field["value"] = f'>{params["upper_bound"]}'
else:
i = params["lower_bound"]
while i < params["upper_bound"]:
if i <= field["value"] < (i + params["interval"]):
field["value"] = f'{i}-{i + params["interval"]}'
break
i += params["interval"]
elif field["question"].question_anonymity == QuestionAnonymity.MONTH_YEAR.value:
dt = datetime.fromisoformat(field["value"])
field["value"] = dt.strftime("%Y-%m")
elif field["question"].question_anonymity == QuestionAnonymity.NOISE.value:
geo_point = field["value"].split()
geo_point[0], geo_point[1] = add_noise_to_gps_coordinates(
float(geo_point[0]), float(geo_point[1]), 3000
)
if geo_point[0] == "Error" or geo_point[1] == "Error":
return False, "Could not anonymize GeoPoint"
field["value"] = " ".join(geo_point)

return True, ""


def insert_anonymized_field(field, form_id, registry_id, schema):
sql_insert_value = (
f"("
f"'{form_id}', "
f"'{registry_id}', "
f"'{field['field_name']}', "
f"'{field['value']}'"
f")"
)
sql = f"INSERT INTO {schema}.anonymized VALUES {sql_insert_value}"
try:
sql_execute(sql)
return True, ""
except Exception as e:
match = re.search(rf"Duplicate entry '({form_id})-(\d+)-(.+?)'", str(e))
if match:
form_name = "registry" if form_id == "-" else f"assessment '{form_id}'"
msg = f"Duplicate entry for package '{match.group(2)}' in {form_name}"
return False, msg
return False, ""


def update_anonymized(to_anonymize, schema, form_id, registry_id, request, current):
for field in to_anonymize:
db_type = type(current[field["field_name"]])
if db_type == date:
new_value = date.fromisoformat(field["value"])
elif db_type == datetime:
new_value = datetime.fromisoformat(field["value"])
else:
new_value = db_type(field["value"])
if current[field["field_name"]] == new_value:
# Only changed values will be updated to avoid recalculating anonymizations
continue
anonymize_field_value(field, registry_id, request)
success, msg = update_anonymized_field(field, form_id, registry_id, schema)
if not success:
return False, msg
return True, ""


def update_anonymized_field(field, form_id, registry_id, schema):
sql = (
f"UPDATE {schema}.anonymized SET value='{field['value']}' "
f"WHERE form_id='{form_id}' "
f"AND reg_id='{registry_id}' "
f"AND col_name='{field['field_name']}'"
)
try:
sql_execute(sql)
return True, ""
except Exception as e:
return False, str(e)


def delete_anonymized_values_by_form_id(schema, form_id):
sql = f"DELETE FROM {schema}.anonymized where form_id='{form_id}'"
sql_execute(sql)


def delete_anonymized_values_by_form_id_and_reg_id(schema, form_id, reg_id):
query = (
f"DELETE FROM {schema}.anonymized "
f"WHERE form_id='{form_id}' "
f"AND reg_id='{reg_id}'"
)
sql_execute(query)


def is_project_anonymized(schema):
query = f"""
SELECT
(SELECT
COUNT(DISTINCT reg_id) AS count
FROM
{schema}.anonymized
WHERE
form_id = '-') = (SELECT
COUNT(qst162) AS count
FROM
{schema}.REG_geninfo) AS count_matches """

result = sql_execute(query).first()
return result["count_matches"] == 1
Loading