多项式拟合曲线
多项式拟合曲线
目标:
掌握最小二乘法求解(无惩罚项的损失函数)、掌握加惩罚项(2范数)的损失函数优化、梯度下降法、共轭梯度法、理解过拟合、克服过拟合的方法(如加惩罚项、增加样本)
要求:
(1)生成数据,加入噪声;
(2)用高阶多项式函数拟合曲线;
(3)用解析解求解两种loss的最优解(无正则项和有正则项)
(4)优化方法求解最优解(梯度下降,共轭梯度);
(5)用你得到的实验数据,解释过拟合。
(6)用不同数据量,不同超参数,不同的多项式阶数,比较实验效果。
(7)语言不限,可以用matlab,python。求解解析解时可以利用现成的矩阵求逆。梯度下降,共轭梯度要求自己求梯度,迭代优化自己写。不许用现成的平台,例如pytorch,tensorflow的自动微分工具。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import numpy as np
import math
from matplotlib import pyplot as plt
def ideal_data(N):
'''
生成完美sin曲线
:param N: 生成数据组数
:return: 生成数据横纵坐标
'''
X = np.linspace(0, 1, N)
Y = np.sin(2 * np.pi * X)
return np.mat(X).transpose(), np.mat(Y).transpose()
def generator(N):
'''
生成带loc = 0 scale = 0.3的噪声的正弦曲线
:param N: 生成数据组数
:return: 生成数据横纵坐标
'''
X = np.linspace(0, 1, N)
Y = np.random.normal(loc=0.0, scale=0.3, size=N) + np.sin(2 * np.pi * X)
return np.mat(X).transpose(), np.mat(Y).transpose()
def X_maker(X, M):
'''
将X转化为 1 X1^1 X1^2 ... X1^M
1 X2^1 X2^2 ... X2^M
...
1 Xn^1 Xn^2 ... Xn^M
:param X: ndarray的X
:param degree: 拟合阶数
:return: 用于求解的X的变化后的矩阵
'''
ending = np.mat(np.ones(X.shape[0])).transpose()
for i in range(M):
ending = np.c_[ending, np.multiply(ending[:, i], X)]
return ending
def w_comput(X, Y):
'''
求 Xw = Y 中的w
'''
return np.dot(np.linalg.pinv(X), Y)
def compute(w, X, M):
'''
由 Xw = Y计算Y
'''
return np.dot(X_maker(X, M), w)
M = 9
# 用高阶多项式函数拟合曲线
X_train, Y_train = generator(10)
X_Mdegerr = X_maker(X_train, M)
X_ideal, Y_ideal = ideal_data(100)
w = w_comput(X_Mdegerr, Y_train)
Y_test = compute(w, X_ideal, M)
plt.subplot(331)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test)
plt.title("polynomial fitting")
plt.xlabel('x')
plt.ylabel('y')
# plt.show()
# 用解析解求解两种loss的最优解 无正则项
def ERMS(X, Y, w):
return math.sqrt((np.sum(np.dot((np.dot(X, w) - Y).transpose(), (np.dot(X, w) - Y)))) / X.shape[0])
def w_compute_without_regulation(X, Y):
'''
计算计算由不带正则项的损失函数求导得出的k
'''
return np.dot(np.dot(np.linalg.inv(np.dot(X.transpose(), X)), X.transpose()), Y)
w1 = w_compute_without_regulation(X_Mdegerr, Y_train)
Y_test1 = compute(w1, X_ideal, M)
plt.subplot(332)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test1)
plt.title("Without regulation fitting")
plt.xlabel('x')
plt.ylabel('y')
ERMS_list3 = []
degreelist = []
ERMS_list4 = []
X_temp0, Y_ideal_0 = ideal_data(10)
for i in range(9):
degreelist.append(i + 1)
temp = X_maker(X_train, i + 1)
w = w_compute_without_regulation(temp, Y_train)
ERMS_list3.append(ERMS(temp, Y_train, w))
ERMS_list4.append(ERMS(temp, Y_ideal_0, w))
plt.subplot(333)
plt.plot(np.mat(np.array(degreelist)).transpose(), np.mat(ERMS_list3).transpose(),
np.mat(np.array(degreelist)).transpose(), np.mat(ERMS_list4).transpose())
plt.title("Without regulation fitting")
plt.xlabel('M')
plt.ylabel('EMRS')
# 用解析解求解两种loss的最优解 有正则项
def w_compute_with_regulation(X, Y, lamda):
'''
计算计算由带正则项的损失函数求导得出的k
'''
return np.dot(np.dot(np.linalg.inv(np.dot(X.transpose(), X) + lamda * np.identity(X.shape[1])), X.transpose()), Y)
lamda = np.e ** -10
w2 = w_compute_with_regulation(X_Mdegerr, Y_train, lamda)
Y_test2 = compute(w2, X_ideal, M)
plt.subplot(334)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test2)
plt.title("With regulation fitting lamda = e^-12")
plt.xlabel('x')
plt.ylabel('y')
# 有惩罚项情况的最优解选取
M1 = 9
X_Mdegerr1 = X_maker(X_train, M1)
ERMS_list1 = []
ERMS_list2 = []
X_temp, Y_test3 = ideal_data(10)
for i in range(100):
w = w_compute_with_regulation(X_Mdegerr1, Y_train, np.e ** ((i / 2) + -50))
ERMS_list1.append(ERMS(X_Mdegerr, Y_train, w))
for i in range(10):
w = w_compute_with_regulation(X_Mdegerr1, Y_train, np.e ** ((i * 5) + -50))
ERMS_list2.append(ERMS(X_Mdegerr, Y_test3, w))
# print(X_show)
plt.subplot(335)
# 必须得矩阵竖着的一列才能正常显示
np.mat(np.linspace(-35,-20,100))
plt.plot(np.mat(np.linspace(-35,-20,100)).transpose(), np.mat(ERMS_list1).transpose(), np.mat(np.linspace(-35,-20,10)).transpose(), np.mat(ERMS_list2).transpose())
plt.title("EMRS praph with lamda")
plt.xlabel('lamda')
plt.ylabel('EMRS')
# 梯度下降
def loss(X, Y, w, lamda):
'''
计算loss,也就是需要被最小化的量
'''
return np.sum(np.dot((np.dot(X, w) - Y).transpose(), (np.dot(X, w) - Y)) + lamda * np.dot(w.transpose(), w)) * (
1 / (2 * X.shape[0]))
def partial_derivative(X, Y, w, lamda):
'''
对w求偏导
'''
return np.dot((np.dot(X.transpose(), X) + lamda * np.identity(X.shape[1])), w) - np.dot(X.transpose(), Y)
# lr learning rate 学习率
lr = 0.01
# quit_flag,当前后两次误差函数计算得出的误差的差值小于quit_flag = 0.01时退出循环
quit_flag = 0.000001
w_iteration = np.mat(np.zeros(10)).transpose()
loss0 = loss(X_Mdegerr, Y_train, w_iteration, lamda)
# partial_derivative = partial_derivative(X_Mdegerr,Y_train,w_iteration,lamda)
for i in range(99999999):
w_iteration = w_iteration - lr * partial_derivative(X_Mdegerr, Y_train, w_iteration, lamda)
loss_old = loss0
loss0 = loss(X_Mdegerr, Y_train, w_iteration, lamda)
if math.fabs(loss_old - loss0) < quit_flag:
break;
#print(w_iteration)
plt.subplot(336)
# 必须得矩阵竖着的一列才能正常显示
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, compute(w_iteration, X_ideal, 9))
plt.title("Gradient descent")
plt.xlabel('x')
plt.ylabel('y')
# 共轭梯度法
def conjugate_gradient(X,Y,M,lamda,delta):
'''
用共轭梯度法计算w
'''
X = X_maker(X,M)
A = np.dot(X.transpose(),X) + lamda * np.identity(X.shape[1])
b = np.dot(X.transpose(),Y)
w_gc = np.mat(np.zeros(X.shape[1])).transpose()
r = b - np.dot(A,w_gc)
p = r
k = 0
for i in range(9999999999):
k = k + 1
print(k)
alpha = np.sum(np.dot(r.transpose(),r)) /np.sum(np.dot(np.dot(p.transpose(),A.transpose()),p))
w_gc = w_gc + alpha * p
r_now = r - alpha * np.dot(A,p)
if math.fabs(np.sum(np.dot(r_now.transpose(),r_now))) < delta:
break
beta = np.sum(np.dot(r_now.transpose(),r_now)) /np.sum(np.dot(r.transpose(),r))
p = r_now + beta * p
r = r_now
return w_gc
w_gc = conjugate_gradient(X_train,Y_train,9,np.e ** -12,0.000000001)
plt.subplot(337)
# 必须得矩阵竖着的一列才能正常显示
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, compute(w_gc, X_ideal, 9))
plt.title("conjugate gradient")
plt.xlabel('x')
plt.ylabel('y')
plt.show()
# print(np.c_[ending,np.multiply(ending[:,0],X_train)])
'''
np.multiply是对应元素相乘,np.dot才是真正的矩阵相乘
a = np.mat("1 2;3 4")
print(a)
print( np.multiply(a[:,0],a[:,0]))
'''
This post is licensed under CC BY 4.0 by the author.