forked from mohamm-alsaid/CDTA_API
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinit_db.py
More file actions
77 lines (69 loc) · 2.36 KB
/
init_db.py
File metadata and controls
77 lines (69 loc) · 2.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import sqlite3
import pandas as pd
import numpy as np
from itertools import cycle
sql_fname = 'mvot_data/MVoTs_schema.sql'
mvots_fname = 'mvot_data/testingMM_1_SimOut.csv'
anon_ids = 'mvot_data/anonymized.csv'
sample_size = 10
# used to rename columns & used to seed the DB
keys = {
'Actor':'actor',
'trust score':'trust_score',
'unexpected message count':'count_unexpected_msgs',
'distrust score':'distrust_score',
'certainty':'certainty',
'average transit time':'avg_tx_time',
'registration date':'registration_date',
'communication frequency':'comm_freq',
'expected message count':'count_expected_msgs',
'alert count':'alerts_count',
'other action count':'other_count',
'SD transit time':'sdtt',
'relative factor of certainty':'RFC',
'message transit time':'tx_time',
'total message count':'total_msgs',
'time since last communication':'TSLC',
'time stamp':'last_timestamp',
'timeout count':'timeout_count',
}
# read rename columns before seeding
records = pd.read_csv(mvots_fname)
print(records.columns)
records = records.rename(keys,axis='columns')[keys.values()]
ids = pd.read_csv(anon_ids)
sample_ids = ids.iloc[:sample_size]
sample_ids = sample_ids.apply(lambda x: ':'.join(x).replace(' ',''),axis='columns')
# round records
# records = records[list(keys.values())[1:]]
# records = records[records != 'actor'].apply(lambda x: round(x,5))
records['anon_id'] = '*:*:*:*' # add default IDs
connection = sqlite3.connect('database.db')
with open(sql_fname) as f:
connection.executescript(f.read())
cur = connection.cursor()
for (i,mvot) in records.iloc[:sample_size*4].iterrows():
print('inserting: ',tuple(mvot)) # exclude actor
cur.execute(f"INSERT INTO MVoTs VALUES (NULL{', ?'*18})",
mvot[['anon_id',
'registration_date',
'last_timestamp',
'sdtt',
'RFC',
'TSLC',
'tx_time',
'comm_freq',
'certainty',
'avg_tx_time',
'trust_score',
'distrust_score',
'total_msgs',
'other_count',
'alerts_count',
'timeout_count',
'count_expected_msgs',
'count_unexpected_msgs'
]]
)
connection.commit()
connection.close()