-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathmain.py
More file actions
115 lines (98 loc) · 4.29 KB
/
Copy pathmain.py
File metadata and controls
115 lines (98 loc) · 4.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import psycopg2
from psycopg2 import pool
from concurrent.futures import ThreadPoolExecutor
import hashlib
import ssl
from socket import *
import config
import OpenSSL
from datetime import datetime
import time
import threading as th
import ctypes
#Definition of all the parameters, read the config_example.py file for more information
PORT = 443
USER = config.user
TABLE = config.main_table
DATABASE = config.db
PERF_FILE = config.perf_file
NUMBER_THREADS = 500 #Much time is lost with connections so nbr_threads must be between 100 and 1000 for optimized parsing
ELT_PER_TH = 50 #Cannot be 1 else not enough memory (too much database requests in parallel)
libgcc_s = ctypes.CDLL('libgcc_s.so.1')
def get_table_size(db,user,table):
"""
Retrieve the size of the given postgre table
"""
conn_db = psycopg2.connect(f"dbname={DATABASE} user={USER}")
cur = conn_db.cursor()
cur.execute(f"SELECT count(*) FROM {table};")
size = cur.fetchall()[0][0]
cur.close
return size
SIZE_DB = get_table_size(DATABASE,USER,TABLE)
def main():
tic = time.perf_counter()
print(f"Size of table : {SIZE_DB}")
try:
threaded_postgreSQL_pool = psycopg2.pool.ThreadedConnectionPool(1,NUMBER_THREADS,user=USER,database=DATABASE)
if (threaded_postgreSQL_pool):
print("Connection pool created successfully using ThreadedConnectionPool")
with ThreadPoolExecutor(max_workers=NUMBER_THREADS) as pool:
for n in range(887000//ELT_PER_TH, SIZE_DB//ELT_PER_TH):
ret = pool.submit(thread, threaded_postgreSQL_pool, n, ELT_PER_TH)
except (Exception, psycopg2.DatabaseError) as error:
print("Error while connecting to PostgreSQL",error)
finally:
toc = time.perf_counter()
print(f"Time Elapsed:{toc-tic}\nSize of Database:{SIZE_DB}\nSpeed:{SIZE_DB/(toc-tic)}\nThreads:{NUMBER_THREADS}")
write_perf_into_file(PERF_FILE,toc-tic,SIZE_DB,NUMBER_THREADS)
if threaded_postgreSQL_pool:
threaded_postgreSQL_pool.closeall
def retrieve_footprint_from_url(url, port):
"""
Retrieve the SHA1 footprint from a website by creating a
TLS connection with a given URL.
"""
conn = ssl.create_connection((url, port),timeout=10)
context = ssl.SSLContext(ssl.PROTOCOL_TLS)
sock = context.wrap_socket(conn, server_hostname=url) #Wrap the connection with TLS protocol
certificate = sock.getpeercert(True) #True gets the 'der' certificate (easier for computing the certificate)
PEM_cert = ssl.DER_cert_to_PEM_cert(certificate)
try:
x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, PEM_cert)
cert_not_after = datetime.strptime(x509.get_notAfter().decode('ascii'), '%Y%m%d%H%M%SZ')
except Exception as e:
pass
#print(e)
return (hashlib.sha1(certificate).hexdigest(),str(cert_not_after)[:10])
def write_perf_into_file(file_name,time_elapsed,size_db,nb_th):
f = open(file_name,'a')
perf = f"Time Elapsed: {time_elapsed} ; Size Database: {size_db} ; Overall Speed: {size_db/time_elapsed} ; Number of Threads: {nb_th}\n"
f.write(perf)
f.close()
def thread(threaded_postgresql_pool,n,elt_per_thread):
conn_db = threaded_postgresql_pool.getconn()
if (conn_db):
cur = conn_db.cursor()
sqlQ = f"SELECT url,id FROM {TABLE} ORDER BY id LIMIT {elt_per_thread} OFFSET {n*elt_per_thread};" #Else memory is not big enough
cur.execute(sqlQ)
list_urls = cur.fetchall()
for elt in list_urls:
the_url = elt[0]
the_id = elt[1]
try:
if int(the_id)%1000 == 0 :
print(f"Line number {the_id} has been completed.")
(sha1_footprint,cert_time) = retrieve_footprint_from_url(the_url, PORT)
timestamp = str(datetime.now())
cur.execute(f"UPDATE {TABLE} SET sha1='{sha1_footprint}',certtime='{cert_time}',timestamp='{timestamp}' WHERE id={the_id};")
except Exception as e:
cur.execute(f"UPDATE {TABLE} SET error='{e}' WHERE id={the_id};")
#print(e)
pass
conn_db.commit()
cur.close()
#conn_db.close()
threaded_postgresql_pool.putconn(conn_db)
if __name__ == "__main__":
main()