-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModel.py
More file actions
133 lines (101 loc) · 5.35 KB
/
Model.py
File metadata and controls
133 lines (101 loc) · 5.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
boston = datasets.load_boston()
# create a data set for input and one data set for target variables
data = boston.data
target = boston.target
response = raw_input("Do you want to see the description for the data set : press y/Y to see details ")
if response.lower() == 'y':
print boston.DESCR
#define list of training vectors and test vector for input data and target data
training_input = list()
training_target = list() #Normalising target is not mandatory
testing_input = list()
testing_target = list()
#fill the training and test vector with data in 9:1 ratio
for i in range(0,data.shape[0]):
if target[i] == 0:
print 'skipping the observation :', data[i]
else:
new_row = data[i][:]
if (i % 10 == 0):
testing_input.append(new_row)
testing_target.append(target[i])
else:
training_input.append(new_row)
training_target.append(target[i])
#convert lists to numpy array for easy implementation later
training_input = np.array(training_input)
training_target = np.array(training_target)
testing_input = np.array(testing_input)
testing_target = np.array(testing_target)
#store a list for learning rate for which analysis has to be done
learning_rates = [0.00001,0.0001,0.001,0.01,0.1,1]
#total number of epochs = 10
total_epochs = 10
#normalise vectors function which internally calls functions to normalise input and targets depending on the number of columns
def normalise_vectors():
normalise_input(training_input)
normalise_input(testing_input)
normalise_target(training_target)
normalise_target(testing_target)
def normalise_input(dataset): #dataset is either the training/testing data numpy array
for j in range(0,dataset.shape[1]): #run for each column ie 13 times as we have 13 columns
minValue = min(dataset[:,j]) #find minimum element in jth column
maxValue = max(dataset[:,j]) #find maximum element in jth column
for i in range(dataset.shape[0]): #for each row element in jth column ie loops runs for number of rows in data array
dataset[i,j] = (dataset[i,j] - minValue)/float(maxValue - minValue)
def normalise_target(dataset): #dataset is either the training/testing target numpy array
minValue = min(dataset) #find minimum element in target.since target has only one column
maxValue = max(dataset) #find maximum element in target.since target has only one column
for i in range(dataset.shape[0]): #for each row element in jth column ie loops runs for number of rows in data array
dataset[i] = (dataset[i] - minValue)/float(maxValue - minValue)
def model(beta_list,input_array):
predicted_target = 0
predicted_target += beta_list[0]
for i in range(0, data.shape[1]): # data.shape[1] = 13 ie for each column x[1],x[2]....x[13]
predicted_target += beta_list[i+1] * input_array[i]
return predicted_target
#calculate rms for each epoch for a particular learning rate
def calculate_rms(beta):
x = 0
for i in range(0,testing_input.shape[0]): #iterate till number of rows in testing input dataset
error = model(beta,testing_input[i]) - testing_target[i]
x = x + error ** 2
return ((float(x)/testing_input.shape[0]) ** 0.5)
#function for building the model and calculation the rms associated with each learning rate for a particular epoch
def calculate_model_prediction_rms():
normalise_vectors() #normalise vectors before building the model.
error = 0
for learning_index in range(0,len(learning_rates)):
rms = list() #create a list to store rms value for each epoch for a particular learning rate
beta = [0]*((data.shape[1])+1) #beta are one number more than the number of inputs and initialised to zero
number_epochs = 0 #initiate epoch to zero initially
while (number_epochs < total_epochs): #loop through the code for total_epochs (10) number of times
for i in range(0,training_input.shape[0]): #iterate till number of rows in training input dataset
error = model(beta,training_input[i]) - training_target[i] #calculate error for each row in the input dataset
for j in range(0,len(beta)): #loop runs from 0 to 13 ie B0 to B13 ie 14 beta values for 13 columns in input
if (j == 0):
beta[j] = beta[j] - learning_rates[learning_index] * error #beta 0 is modified with the learning rate and error
else:
#train other beta values ie beta 1 to beta 13 with error,learning rate and input x value
beta[j] = beta[j] - learning_rates[learning_index] * error * training_input[i][j-1]
rms.append(calculate_rms(beta)) #calculate rms for each epoch and append to the list
number_epochs += 1 #increment count of epochs
#plotting rms values
plt.plot(range(total_epochs),rms,color='red')
plt.xticks(range(10) + [1] * 10)
plt.xlabel("Epoch Number")
plt.ylabel("RMS Value")
plt.suptitle("RMS Curves for a Learning rate",color = 'blue')
plt.title("Learning Rate : " "%.6f" % float(learning_rates[learning_index]))
plt.show()
user_input = raw_input("Do you want to see the division of the data sets? Press y/Y to view : ")
if user_input.lower() == 'y':
print "training_input",training_input.shape
print "training_target",training_target.shape
print "testing_input",testing_input.shape
print "testing_target",testing_target.shape
#Function call for the main function which internally calls all the function.
calculate_model_prediction_rms()