-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinear_regression.py
More file actions
76 lines (66 loc) · 2.47 KB
/
linear_regression.py
File metadata and controls
76 lines (66 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import matplotlib.pyplot as plt
class LinearRegression(object):
'''线性回归'''
def __init__(self):
# 初始化一些数据
self.alpha = 0.01 # 学习率
self.min_change = 1000 # 阈值
# 加载数据
def load_data(self, filename):
data = np.loadtxt(filename, delimiter=',')
# 样本数量
m = data.shape[0]
# 特征数量
n = data[:, 0:-1].shape[1]
X0 = np.ones((m, 1))
X1 = np.hstack((X0, data[:, :-1]))
# 特征缩放
X, mu, sigma = self.featureScale(X1)
Y = data[:, -1].reshape(m, 1)
return X, Y, m, n, mu, sigma
# 代价函数
def computeCost(self, X, selfY, theta):
error = np.dot(X, theta) - Y
J = np.dot(error.T, error) / (2 * m)
return J
# 特征缩放,X:n+1维矩阵
def featureScale(self, X):
data = X[:, 1:]
mu = np.mean(data, axis=0) # 注:axis=0,对列进行计算,与octave中相似
sigma = np.std(data, axis=0)
data_scale = (data - mu) / sigma
# 将X0拼接上
data = np.hstack((X[:, 0].reshape(len(X), 1), data_scale))
return data, mu, sigma
# 计算梯度
def gradientDescent(self, X, Y, theta):
error = np.dot(X, theta) - Y
gradient = np.dot(X.T, error) / m
return gradient
# 使用梯度下降迭代算法
def train(self, X, Y):
theta = np.ones((n + 1, 1)) # 初始化Theta
gradient = self.gradientDescent(X, Y, theta)
iter_num = 0
# 代价函数变化记录
J_history_list = []
while not np.all(np.absolute(gradient) <= self.min_change):
theta = theta - self.alpha * gradient
gradient = self.gradientDescent(X, Y, theta)
iter_num += 1
J_history_list.append(self.computeCost(X, Y, theta)[0][0])
return theta, iter_num, J_history_list
if __name__ == '__main__':
print('Running gradient descent ...\n')
L = LinearRegression()
X, Y, m, n, mu, sigma = L.load_data('ex1data2.txt')
theta, iter_num, J_history_list = L.train(X, Y)
# 画出代价函数与的迭代次数的曲线图
print('Theta computed from gradient descent:', theta)
print('Number of iterations:\n', iter_num)
print('Cost J:\n', L.computeCost(X, Y, theta))
plt.plot(np.arange(0, iter_num, 1), J_history_list, '-r')
plt.xlabel('Number of iterations')
plt.ylabel('Cost J')
plt.show()