-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclassification.py
More file actions
79 lines (64 loc) · 2.61 KB
/
Copy pathclassification.py
File metadata and controls
79 lines (64 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*-coding:Latin-1 -*
import numpy as np
class Classifier:
def __init__(self, C=None):
self.C = C
if self.C is not None: self.C = float(self.C)
def fit(self, X, y):
n_samples, n_features = X.shape
# Gram Matrix
K = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
K[i,j] = sp.sparse.csc_matrix.dot(X[i], X[j].T).toarray()
# Lagrange Multipliers
P = sp.sparse.csc_matrix(np.outer(y,y) * K)
q = sp.sparse.csc_matrix(np.ones(n_samples) * -1)
A = sp.sparse.csc_matrix(y, (1,n_samples))
b = sp.sparse.csc_matrix(0.0)
if self.C is None:
G = sp.sparse.csc_matrix(np.diag(np.ones(n_samples) * -1))
h = sp.sparse.csc_matrix(np.zeros(n_samples))
else:
tmp1 = np.diag(np.ones(n_samples) * -1)
tmp2 = np.identity(n_samples)
G = sp.sparse.csc_matrix(np.vstack((tmp1, tmp2)))
tmp1 = np.zeros(n_samples)
tmp2 = np.ones(n_samples) * self.C
h = sp.sparse.csc_matrix(np.hstack((tmp1, tmp2)))
# Function to minimize in order to solve the dual problem
def fun_to_minimize(x):
z = (1/2)*np.dot(x,P.dot(x)) + q.dot(x)
return(z)
cons = ({'type': 'eq', 'fun': lambda x: A.dot(x)},
{'type': 'ineq', 'fun': lambda x: np.asarray((h.todense() - G.dot(x))).reshape(-1)})
bnds = ((0, None),)*n_samples
#Use the scipy library to find the minimum
res = minimize(fun_to_minimize, (0,)*n_samples, method='SLSQP', bounds=bnds, constraints=cons)
self.res = res
a = res.x
# Support vectors have non zero lagrange multipliers
sv = a > 1e-13
ind = np.arange(len(a))
self.ind = ind
self.a = a[sv]
self.sv = X[sv]
self.sv_y = y[sv]
print("%d support vectors out of %d points" % (len(self.a), n_samples))
# Intercept
self.b = 0
for n in range(len(self.a)):
self.b += self.sv_y[n]
self.b -= np.sum(self.a * self.sv_y * K[ind[n],sv])
self.b /= len(self.a)
# Weight vector
self.w = np.zeros(n_features)
for n in range(len(self.a)):
self.w += self.a[n] * self.sv_y[n] * self.sv[n]
def project(self, X):
return X.dot((self.w).T) + self.b
def predict(self, X):
return np.sign(self.project(X))
def score(self, X, y):
y_pred = np.squeeze(np.asarray(svm.predict(X)))
return np.mean(y_pred == y)