-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.py
More file actions
126 lines (93 loc) · 4.99 KB
/
run.py
File metadata and controls
126 lines (93 loc) · 4.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import os
import shutil
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from Experiment import Experiment
from detectors.IBDD import IBDD
from detectors.IKS import IKS
from detectors.WRS import WRS
from detectors.baseline import Baseline
from utils.make_tests_imbalanced import make_tests
import argparse
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
def run(dataset, window_size, score_lenght, path_train, path_tests, path_results, classifier, positive_proportions):
# CHECKING IF THE IBDD FOLDER ALREADY EXISTS, IF YES, CLEAR IT
ibdd_dir = f"{os.getcwd()}/ibdd_folder/{dataset}"
if not os.path.isdir("ibdd_folder"):
os.mkdir(f"{os.getcwd()}/ibdd_folder")
if os.path.isdir(ibdd_dir):
shutil.rmtree(ibdd_dir)
os.mkdir(ibdd_dir)
df_test = pd.read_csv(f"{path_tests}/{dataset}.test.csv")
contexts = df_test.iloc[:, -1].tolist()
contexts = [1 if contexts[i-1] != x and i != 0 else 0 for i, x in enumerate(contexts) ]
rd = [i for i, x in enumerate(contexts) if x == 1]
# CREATING TEST FILES VARYING CLASS DISTRIBUTION IN EACH CONTEXT
make_tests(path_tests, dataset, positive_proportions, rd)
# PASSING BY EACH TEST FILE
for f in list(os.listdir(f"{path_tests}/{dataset}")):
print(f)
test = pd.read_csv(f"{path_tests}/{dataset}/{f}")
train = pd.read_csv(f"{path_train}")
train['class'].replace(2, int(0), inplace=True)
# TAKING THE CONTEXT
contexts = test.iloc[:, -1].tolist()
contexts = [1 if contexts[i-1] != x and i != 0 else 0 for i, x in enumerate(contexts) ]
real_drifts = [i for i, x in enumerate(contexts) if x == 1]
# CREATING TRAIN, TEST DATASTREAMS
train = train.iloc[:, :-1].copy()
test = test.iloc[:, :-1].copy()
print(f"dataset: {dataset}")
vet_accs_table = pd.DataFrame()
#BASELINE RUN EXPERIMEN
baseline = Baseline(train, window_size)
exp = Experiment(train, test, window_size, classifier, baseline, 'baseline', score_lenght)
vet_accs_table, drift_points_baseline, window_prop_baseline, discrepances_baseline, dys_distances_baseline = make_experiment(exp, contexts, vet_accs_table)
# CREATING VARIABLES TO SAVE TO A EXTERNAL FILE
drift_table = pd.DataFrame({"baseline":drift_points_baseline, "REAL":contexts})
vet_accs_table["real"] = test.iloc[:, -1].tolist()
discrepances = pd.DataFrame(discrepances_baseline, columns=['baseline_discrepances'])
discrepances["dys_distances"] = dys_distances_baseline
window_prop = pd.concat([window_prop_baseline], axis=1)
# SAVING THE DATAFRAMES INTO FILES FOR EACH DATASET
discrepances.to_csv(f"{path_results}/{dataset}_{f[:f.rfind('.')]}_{window_size}_{score_lenght}_discrepances.csv", index=False)
window_prop.to_csv(f"{path_results}/{dataset}_{f[:f.rfind('.')]}_{window_size}_{score_lenght}_prop.csv", index=False)
drift_table.to_csv(f"{path_results}/{dataset}_{f[:f.rfind('.')]}_{window_size}_{score_lenght}_drift.csv", index=False)
vet_accs_table.to_csv(f"{path_results}/{dataset}_{f[:f.rfind('.')]}_{window_size}_{score_lenght}_pred.csv", index=False)
def make_experiment(experiment_object, contexts, vet_accs_table) -> list:
"""create a new experiment and saving the results into dataframes
Args:
experiment_object (Any): drift detector to be running the experiments on
contexts (list): list of context
vet_accs_table (DataFrame): pandas dataframe to save the results of experiment
Returns:
list: results of accuracies of predictions and the list of drift predictions
"""
vet_accs, drift_points, win_prop, discrepances, dys_distance = experiment_object.run_stream()
vet_accs_table = pd.concat([vet_accs_table, vet_accs], axis=1)
drift_points = [1 if x in list(drift_points.values())[0] else 0 for x in range(len(contexts))]
return vet_accs_table, drift_points, win_prop, discrepances, dys_distance
if __name__ == "__main__":
print('Iniciando...\n')
parser = argparse.ArgumentParser()
parser.add_argument('dataset', type=str)
parser.add_argument('window', type=int)
parser.add_argument('score_lenght', type=int)
args = parser.parse_args()
path_train = f"{os.getcwd()}/datasets/training/{args.dataset}.train.csv"
path_tests = f"{os.getcwd()}/datasets/test"
path_results = f"{os.getcwd()}/results/"
classifier = RandomForestClassifier(n_estimators=200, n_jobs=-1)
positive_proportions = [0.2, 0.5, 0.8]
run(dataset=args.dataset,
window_size=args.window,
score_lenght=args.score_lenght,
path_train=path_train,
path_tests=path_tests,
path_results=path_results,
classifier=classifier,
positive_proportions=positive_proportions)
print('Fim')