-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMailtoDataEncode.py
More file actions
130 lines (104 loc) · 5.98 KB
/
MailtoDataEncode.py
File metadata and controls
130 lines (104 loc) · 5.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import json
import os, sys
import hashlib
import re
from mailto import mailto
from MailtoManage import MailtoManage
MAXIMUM_BLOCKSIZE_TO_READ = 65535
EMAIL_ADDRESS_REGEX = "\A[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\z"
# description: generate the "emaildata_lookup.json" file:
# {
# "00018744d84b8ae35942b3b670d5fb734cd2eac2bf4c2e09ffba916472f3b018":"tim.barton@tabcorp.com.au",
# "014b64ed9d69d16cf8d9c4c170c9502d6b137e1d8c7b4e51ff43638b756d486f":"henrietta.thomas@tabcorp.com.au",
# "01bbb7fccf0ff2e7cfc09a1cd1ce04de8ed4c40ff24b11e063b8cd577e3202c1":"paul.moore@scientificgames.com",
# "02866c1cfc9c619ed90f31a474d3726ee7653b2fa3b2d0586211dd8425086d59":"trose@bytecraft.com.au",
# "02a56382213dd53ebd4db85b5cbc70861d9d4c46388a37f222f1003e628298b5":"clowe@agtslots.com",
# "041663e3349b3dde0a259efd171839a814ea170c97caa89f9e239b09b660b7ed":"operational.compliance@tattsgroup.com",
# "04a1ede4597722a1318119d768ebf75fc0088b57e7a1189d24da7af2ffc1984b":"dugald.peacock@tabcorp.com.au",
# "06a7d77988c4bf00867f5559b5dde67e0107645e69e159deca5149d8146ca780":"dlstaffrecords@tattsgroup.com",
# ...
# }
# As well as the 'emaildata_v2.json' file:
# {
# "Car Booking":[
# "8c165a9c8d5a7362b45c51c47eb09158086fa579eaaae2620c7572881707ca38"
# ],
# "Car Booking_cc":[
# "e6a027809eaf55716e052e4439953574d519b881bbfdd81487f998f25804eb1b"
# ],
# "Casino Reef":[
# "c2c28fe5615f9a842cfa3dc7d0274d231177558a33b78625f31de954486c510c",
# "12d09858d17f2daaa7b4b3443a73a2bc039e94e5e2b69dc21e1dce888b522416"
# ],
# "Casino Reef Datafiles":[
# "c2c28fe5615f9a842cfa3dc7d0274d231177558a33b78625f31de954486c510c",
# "12d09858d17f2daaa7b4b3443a73a2bc039e94e5e2b69dc21e1dce888b522416"
# ],
# ...
# }
# output: 2 x files named: "sample_emaildata_lookup.json" and "sample_emaildata_v2.json"
# you will need to do a diff/compare with the exisiting lookup and emaildata files
# input: file is the file generated by the 'iNetToMailtoFormat.py' script. by default this is called "sample_converted.json"
class MailtoDataEncode:
def __init__(self, fname):
self.filename = fname
# Generate the Loookup File for Hashes : Email Addresses
self.json_data = mailto.ReadJSONfile(self, self.filename)
self.email_address_hashed = self.ProcessEmailGroups(self.json_data)
print(json.dumps(self.email_address_hashed, sort_keys=True, indent=4, separators=(',',':'))) # write to disk.
self.WriteDatatoFile(self.email_address_hashed, "sample_emaildata_lookup.json")
# Converts emaildata.json to replace email addresses with Hashes
self.emailgroups_data_w_hash = self.ConvertEmailData(self.json_data, self.email_address_hashed)
print(json.dumps(self.emailgroups_data_w_hash, sort_keys=True, indent=4, separators=(',',':'))) # write to disk.
self.WriteDatatoFile(self.emailgroups_data_w_hash, "sample_emaildata_v2.json")
# Note: Let MailtoManage generate emaildata_v2.json_sigs.json file.
# Test Retrieves email address for the following email group.
test_mail_group = "OLGR Casino Inspectorate - Star Casino - Brisbane"
assert(self.GetEmailAddressListString_from_EmailGroup(test_mail_group) == 'brisbanesouth@justice.qld.gov.au')
def GetEmailAddressListString_from_EmailGroup(self, email_group_str):
email_hashes = self.emailgroups_data_w_hash[email_group_str]
email_addresses = list()
for h in email_hashes:
email_addresses.append(self.email_address_hashed[h])
return ";".join(email_addresses)
def ProcessEmailGroups(self, jd):
email_address_dict = dict()
for _,email_list in jd.items():
for eaddr in email_list:
# string ';' in email address
eaddr = re.sub(';', '', eaddr.lower())
# filter out just email address
match = re.findall(r'[\w\.-]+@[\w\.-]+', eaddr)
hash_str = self.HashStr(match[0]) # hash the matched string
email_address_dict[hash_str] = match[0] # update
# print(json.dumps(email_address_dict, sort_keys=True, indent=4, separators=(',',':'))) # write to disk.
return(email_address_dict)
def WriteDatatoFile(self, data, fname):
with open(fname,'w+') as json_file:
json.dump(data, json_file, sort_keys=True, indent=4, separators=(',',':'))
# messagebox.showinfo("Backup Complete", "Backup of " + self.filename + ", has been saved as: " + output_backup_fname)
def ConvertEmailData(self, email_data, email_hash_data):
new_email_data = dict()
for email_group, email_list in email_data.items():
email_hash_list = list()
for email in email_list:
# string ';' in email address
email = re.sub(';', '', email.lower())
# filter out just email address
match = re.findall(r'[\w\.-]+@[\w\.-]+', email)
if len(match) > 0:
for hash, hashed_email in email_hash_data.items():
if hashed_email == match[0]:
email_hash_list.append(hash)
new_email_data[email_group] = sorted(email_hash_list)
# print(json.dumps(new_email_data, sort_keys=True, indent=4, separators=(',',':'))) # write to disk.
return new_email_data
def HashStr(self, s):
hash_obj = hashlib.sha256(str(s).encode('utf-8'))
return hash_obj.hexdigest()
def main():
input_file = "sample_converted.json"
# input_file is a JSON file format that needs to be formatted as a dict
# to support mailto script.
app = MailtoDataEncode(input_file)
if __name__ == "__main__": main()