-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExperiment.py
More file actions
164 lines (133 loc) · 8.09 KB
/
Experiment.py
File metadata and controls
164 lines (133 loc) · 8.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import pandas as pd
import numpy as np
import pdb
from utils.getTrainingScores import getTrainingScores
from utils.getTPRFPR import getTPRFPR
from utils.applyquantifiers import apply_quantifier
from utils.get_best_thr import get_best_threshold
from quantification.dys_method import get_dys_distance
class Experiment:
def __init__(self, train_data, test_data, window_length, model, detector, detector_name, score_length):
self.trainX = train_data.iloc[:, :-1]
self.testX = test_data.iloc[:, :-1]
self.trainY = train_data.iloc[:, -1]
self.testY = test_data.iloc[:, -1]
self.window_length = window_length if window_length < len(self.trainY) else len(self.trainY)
self.model = model.fit(self.trainX, self.trainY)
self.detector = detector
self.detector_name = detector_name
self.score_length = score_length
self.drifts = []
self.quantifier_methods = ["CC", "ACC", "MS", "DyS"]
def run_stream(self):
"""Simulate a Datastream, running a window and testing the occurrences of drifts. While applying quantification
"""
discrepances = []
dys_distances = []
# Starting window
window = self.trainX.iloc[-self.window_length:].copy(deep=True).reset_index(drop=True)
window_labels = self.trainY.iloc[-self.window_length:].copy(deep=True).reset_index(drop=True)
# proportions predicted by eacch quantifier method and real proportion
window_prop = {f"{self.detector_name}-{qtf}":[] for qtf in self.quantifier_methods}
window_prop[f"real_{self.detector_name}"] = []
real_labels_window = self.trainY.iloc[-self.score_length:].tolist()
# Getting the training scores, and the initial things we need to run the quantification methods
scores, tprfpr, pos_scores, neg_scores = self.get_train_values()
test_scores = self.model.predict_proba(self.trainX.iloc[-(self.score_length):])[:, 1].tolist()
vet_accs = {self.detector_name : []}
iq = 0
# Running Datastream
for i in range(0, len(self.testY)):
print(f"{self.detector_name}-{i}", end='\r')
new_instance = self.testX.iloc[[i]]
# Step of the window
window = pd.concat([window, new_instance], ignore_index=True).iloc[1:]
window_labels = pd.concat([window_labels, self.testY.iloc[[i]]], ignore_index=True).iloc[1:]
# Getting the positive score of each instance
new_instance_score = float(self.model.predict_proba(new_instance)[:, 1][0])
test_scores.append(new_instance_score)
real_labels_window.append(self.testY.iloc[[i]].tolist()[0])
# Incrementing the new instance to the detector (IKS, IBDD and WRS)
self.detector.Increment(self.testX.loc[i], window, i)
window_prop[f"real_{self.detector_name}"].append(round(sum(real_labels_window)/len(real_labels_window), 2))
# Applying quantification after 10 instances
#pdb.set_trace()
vet_accs, window_prop, dys_distances = self.apply_quantification(scores,
np.array(test_scores),
tprfpr,
pos_scores,
neg_scores,
window,
new_instance_score,
vet_accs,
window_prop,
dys_distances)
discrepance = np.round(np.absolute(np.subtract(np.array(test_scores),np.array(real_labels_window))).mean(), 3)
discrepances.append(discrepance)
test_scores = test_scores[1:]
real_labels_window = real_labels_window[1:]
vet_accs[self.detector_name].append(self.model.predict(new_instance)[0])
if (self.detector.Test(i)):
self.drifts.append(i)
# turning current window into train, and updating classifier and detector
self.trainX = window
self.trainY = window_labels
scores, tprfpr, pos_scores, neg_scores = self.get_train_values()
self.detector.Update(window)
test_scores = self.model.predict_proba(self.trainX.iloc[-(self.score_length-1):])[:, 1].tolist()
real_labels_window = self.trainY.iloc[-self.score_length:].tolist()
iq = -1
iq += 1
return pd.DataFrame(vet_accs), {self.detector_name:self.drifts}, pd.DataFrame(window_prop), discrepances, dys_distances
def apply_quantification(self,
scores: object,
test_scores: list[float],
tprfpr : object,
pos_scores : object,
neg_scores : object,
windowX : object,
new_instance_score : float,
vet_accs : dict[str: list[int]],
window_prop : dict[str: list[float]],
dys_distances : list[float]):
"""Apply quantification into window, getting the positive scores and fiding the best threshold to classify the new instance
Args:
scores (list[float]): positive scores of the window, predicted by classifier
windowX (Any): current window without labels
new_instance_score (Any): score of the new instance
vet_accs (dict[str: list[int]]) : dictionary containing the predicted class of each quantification algorithm and also the classification only
"""
proportions = {}
for qtf_method in self.quantifier_methods:
pred_pos_prop = apply_quantifier(qntMethod=qtf_method,
clf = self.model,
scores=scores['scores'],
p_score=pos_scores,
n_score=neg_scores,
train_labels=scores['class'],
test_score=test_scores,
TprFpr=tprfpr,
thr=0.5,
measure="topsoe",
test_data=windowX)
if qtf_method == "DyS":
dys_distance = get_dys_distance(pos_scores,
neg_scores,
test_scores)
dys_distances.append(dys_distance)
window_prop[f"{self.detector_name}-{qtf_method}"].append(round(pred_pos_prop, 2))
proportions[f"{self.detector_name}-{qtf_method}"] = pred_pos_prop
for qtf, proportion in proportions.items():
name = qtf
thr = get_best_threshold(proportion, test_scores) # getting the threshold using the positive proportion
if name not in vet_accs:
vet_accs[name] = []
# Using the threshold to determine the class of the new instance score
vet_accs[name].append(1 if new_instance_score >= thr else 0)
return vet_accs, window_prop, dys_distances
def get_train_values(self):
scores, self.model = getTrainingScores(self.trainX, self.trainY, 10, self.model)
tprfpr = getTPRFPR(scores)
pos_scores = scores[scores["class"]==1]["scores"]
neg_scores = scores[scores["class"]==0]["scores"]
return scores, tprfpr, pos_scores, neg_scores