Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions examples/rnn_main_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from pcap.parser import PCAP_PKTS, _pcap2flows
from util.tool import dump_data, load_data

from time import *

RANDOM_STATE = 100

cuda = True if torch.cuda.is_available() else False
Expand All @@ -42,7 +44,7 @@ def set_random_state(random_state=100):
set_random_state(random_state=RANDOM_STATE)


def raw2features(raw_features, header=True, MTU=1500, normalize=True):
def raw2features(raw_features, data_type=True, MTU=1500, normalize=True):
"""Extract features for the detection model

Parameters
Expand All @@ -64,10 +66,13 @@ def normalize_bytes(flow):
feat_0 = v_lst[0]
feat_i_lst = v_lst[1:]

if header:
if data_type == 'header':
tmp_v = [v['header'] for v in feat_i_lst]
tmp_v = [v + [0] * (40 - len(v)) if len(v) < 40 else v[:40] for v in tmp_v]
elif data_type == 'header_payload':
tmp_v = [v['header'] + v['payload'] for v in feat_i_lst]
tmp_v = [v + [0] * (MTU - len(v)) if len(v) < MTU else v[:MTU] for v in tmp_v]
else:
else: # data_type=='payload':
payload_len = MTU - 40
tmp_v = [v['payload'] for v in feat_i_lst]
tmp_v = [v + [0] * (payload_len - len(v)) if len(v) < payload_len else v[:payload_len] for v in tmp_v]
Expand All @@ -80,7 +85,7 @@ def normalize_bytes(flow):
return X


def load_flow_data(overwrite=False, random_state=100, full_flow=True):
def load_flow_data(overwrite=False, random_state=100, full_flow=True, data_type='header'):
"""Get raw features from PCAP and store them into disk

Parameters
Expand All @@ -101,8 +106,8 @@ def load_flow_data(overwrite=False, random_state=100, full_flow=True):
# # # # # # #
# # # # # # 'DS20_PU_SMTV/DS21-srcIP_10.42.0.1',
# # # # # # # #
#'DS40_CTU_IoT/DS41-srcIP_10.0.2.15',
'DS40_CTU_IoT/DS42-srcIP_192.168.1.196',
'DS40_CTU_IoT/DS41-srcIP_10.0.2.15',
# 'DS40_CTU_IoT/DS42-srcIP_192.168.1.196',
# # # # #
# # # # # # 'DS50_MAWI_WIDE/DS51-srcIP_202.171.168.50',
# # # 'DS50_MAWI_WIDE/DS51-srcIP_202.171.168.50',
Expand All @@ -128,7 +133,7 @@ def load_flow_data(overwrite=False, random_state=100, full_flow=True):

dataset_name = datasets[0]
print(f'dataset: {dataset_name}')
in_dir = 'data/data_reprst/pcaps'
in_dir = './examples/data/data_reprst/pcaps'
if dataset_name == 'DS40_CTU_IoT/DS42-srcIP_192.168.1.196':
in_norm_file = f'{in_dir}/{dataset_name}/2019-01-09-22-46-52-src_192.168.1.196_CTU_IoT_CoinMiner_anomaly.pcap'
in_abnorm_file = f'{in_dir}/{dataset_name}/2018-12-21-15-50-14-src_192.168.1.195-CTU_IoT_Mirai_normal.pcap'
Expand Down Expand Up @@ -175,9 +180,9 @@ def load_flow_data(overwrite=False, random_state=100, full_flow=True):
out_abnorm_file = in_abnorm_file + '-raw_abnormal_features.dat'
dump_data(abnorm_pp.features, out_abnorm_file)

X_norm = raw2features(load_data(out_norm_file), header=False)
X_norm = raw2features(load_data(out_norm_file), data_type=data_type)
y_norm = [0] * len(X_norm)
X_abnorm = raw2features(load_data(out_abnorm_file), header=False)
X_abnorm = raw2features(load_data(out_abnorm_file), data_type=data_type)
y_abnorm = [1] * len(X_abnorm)

return split_train_test(X_norm, y_norm, X_abnorm, y_abnorm, random_state)
Expand All @@ -202,6 +207,7 @@ def split_train_test(X_norm, y_norm, X_abnorm, y_abnorm, random_state=100):
# X_norm = sklearn.utils.shuffle(X_norm, random_state)
random.Random(random_state).shuffle(X_norm) #注意此处打乱数据的作用
size = int(len(y_norm) // 2) if len(y_norm) <= len(y_abnorm) else min(400, len(y_abnorm))
size = 8800
X_test = X_norm[-size:] + X_abnorm[:size]
y_test = y_norm[-size:] + y_abnorm[:size]
X_train = X_norm[:-size]
Expand All @@ -212,13 +218,21 @@ def split_train_test(X_norm, y_norm, X_abnorm, y_abnorm, random_state=100):


def main(random_state=100):
X_train, y_train, X_test, y_test = load_flow_data(random_state=random_state)

rnn = RNN(n_epochs=100, in_dim=1460, out_dim=10, n_layers=1, lr=1e-3, bias=False, random_state=random_state)
data_type = 'payload' # header, header_payload
X_train, y_train, X_test, y_test = load_flow_data(random_state=random_state, data_type=data_type)
in_dim = len(X_train[0][0])
rnn = RNN(n_epochs=100, in_dim=in_dim, out_dim=10, n_layers=1, lr=1e-3, bias=False, random_state=random_state)

rnn.train(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test, split=True)

#计算test运行时间
begin_time = time()

rnn.test(X_test=X_test, y_test=y_test, split=True)

end_time = time()
run_time = end_time-begin_time
print("test time:",run_time)


if __name__ == '__main__':
Expand Down
Loading