-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhw8.py
More file actions
101 lines (79 loc) · 2.2 KB
/
hw8.py
File metadata and controls
101 lines (79 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import sys
import random
import math
datafile = sys.argv[1]
f = open(datafile, 'r')
data = []
a = f.readline()
while (a != ''):
b = a.split()
a2 = []
for c in range(0, len(b), 1):
a2.append(float(b[c]))
data.append(a2)
a = f.readline()
rows = len(data)
cols = len(data[0])
f.close()
d = int(sys.argv[2])
col = [0 for _ in range(0, cols, 1)]
e = [col for _ in range(0, d, 1)]
value = 0
for f in range(0, d, 1):
value = random.randrange(1, rows-1)
e[f] = data[value]
trainlabels = {}
difference = 1
prev = [[0]*cols for g in range(d)]
mdist = [0 for _ in range(0, d, 1)]
n = [0.1 for _ in range(0, d, 1)]
distance = [0.1 for _ in range(0, d, 1)]
totaldistance = 1
classes = []
while ((totaldistance) > 0):
for h in range(0,rows, 1):
distance =[]
for f in range(0, d, 1):
distance.append(0)
for f in range(0, d, 1):
for c in range(0, cols, 1):
distance[f] += ((data[h][c] - e[f][c])**2)
for f in range(0, d, 1):
distance[f] = (distance[f])**0.5
mindist=0
mindist = min(distance)
for f in range(0, d, 1):
if(distance[f]==mindist):
trainlabels[h] = f
n[f]+=1
break
e = [[0]*cols for g in range(d)]
col = []
for h in range(0, rows, 1):
for f in range(0, d, 1):
if(trainlabels.get(h) == f):
for c in range(0, cols, 1):
temp = e[f][c]
temp1 = data[h][c]
e[f][c] = temp + temp1
for c in range(0, cols, 1):
for h in range(0, d, 1):
e[h][c] = e[h][c]/n[h]
classes = [int(g) for g in n]
n=[0.1]*d
#print("m",e)
dist = []
for f in range(0, d, 1):
dist.append(0)
for f in range(0, d, 1):
for c in range(0, cols, 1):
dist[f]+=float((prev[f][c]-e[f][c])**2)
dist[f] = (dist[f])**0.5
prev=e
totaldistance = 0
for i in range(0,len(dist),1):
totaldistance += dist[i]
#print("distance between means:",totaldistance)
#print("data in each cluster for k =",d,"is",classes)
for h in range(0,rows, 1):
print(trainlabels[h],h)