多项式拟合曲线
目标:
掌握最小二乘法求解(无惩罚项的损失函数)、掌握加惩罚项(2范数)的损失函数优化、梯度下降法、共轭梯度法、理解过拟合、克服过拟合的方法(如加惩罚项、增加样本)
要求:
(1)生成数据,加入噪声;
(2)用高阶多项式函数拟合曲线;
(3)用解析解求解两种loss的最优解(无正则项和有正则项)
(4)优化方法求解最优解(梯度下降,共轭梯度);
(5)用你得到的实验数据,解释过拟合。
(6)用不同数据量,不同超参数,不同的多项式阶数,比较实验效果。
(7)语言不限,可以用matlab,python。求解解析解时可以利用现成的矩阵求逆。梯度下降,共轭梯度要求自己求梯度,迭代优化自己写。不许用现成的平台,例如pytorch,tensorflow的自动微分工具。
import numpy as np
import math
from matplotlib import pyplot as plt
def ideal_data(N):
'''
生成完美sin曲线
:param N: 生成数据组数
:return: 生成数据横纵坐标
'''
X = np.linspace(0, 1, N)
Y = np.sin(2 * np.pi * X)
return np.mat(X).transpose(), np.mat(Y).transpose()
def generator(N):
'''
生成带loc = 0 scale = 0.3的噪声的正弦曲线
:param N: 生成数据组数
:return: 生成数据横纵坐标
'''
X = np.linspace(0, 1, N)
Y = np.random.normal(loc=0.0, scale=0.3, size=N) + np.sin(2 * np.pi * X)
return np.mat(X).transpose(), np.mat(Y).transpose()
def X_maker(X, M):
'''
将X转化为 1 X1^1 X1^2 ... X1^M
1 X2^1 X2^2 ... X2^M
...
1 Xn^1 Xn^2 ... Xn^M
:param X: ndarray的X
:param degree: 拟合阶数
:return: 用于求解的X的变化后的矩阵
'''
ending = np.mat(np.ones(X.shape[0])).transpose()
for i in range(M):
ending = np.c_[ending, np.multiply(ending[:, i], X)]
return ending
def w_comput(X, Y):
'''
求 Xw = Y 中的w
'''
return np.dot(np.linalg.pinv(X), Y)
def compute(w, X, M):
'''
由 Xw = Y计算Y
'''
return np.dot(X_maker(X, M), w)
M = 9
# 用高阶多项式函数拟合曲线
X_train, Y_train = generator(10)
X_Mdegerr = X_maker(X_train, M)
X_ideal, Y_ideal = ideal_data(100)
w = w_comput(X_Mdegerr, Y_train)
Y_test = compute(w, X_ideal, M)
plt.subplot(331)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test)
plt.title("polynomial fitting")
plt.xlabel('x')
plt.ylabel('y')
# plt.show()
# 用解析解求解两种loss的最优解 无正则项
def ERMS(X, Y, w):
return math.sqrt((np.sum(np.dot((np.dot(X, w) - Y).transpose(), (np.dot(X, w) - Y)))) / X.shape[0])
def w_compute_without_regulation(X, Y):
'''
计算计算由不带正则项的损失函数求导得出的k
'''
return np.dot(np.dot(np.linalg.inv(np.dot(X.transpose(), X)), X.transpose()), Y)
w1 = w_compute_without_regulation(X_Mdegerr, Y_train)
Y_test1 = compute(w1, X_ideal, M)
plt.subplot(332)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test1)
plt.title("Without regulation fitting")
plt.xlabel('x')
plt.ylabel('y')
ERMS_list3 = []
degreelist = []
ERMS_list4 = []
X_temp0, Y_ideal_0 = ideal_data(10)
for i in range(9):
degreelist.append(i + 1)
temp = X_maker(X_train, i + 1)
w = w_compute_without_regulation(temp, Y_train)
ERMS_list3.append(ERMS(temp, Y_train, w))
ERMS_list4.append(ERMS(temp, Y_ideal_0, w))
plt.subplot(333)
plt.plot(np.mat(np.array(degreelist)).transpose(), np.mat(ERMS_list3).transpose(),
np.mat(np.array(degreelist)).transpose(), np.mat(ERMS_list4).transpose())
plt.title("Without regulation fitting")
plt.xlabel('M')
plt.ylabel('EMRS')
# 用解析解求解两种loss的最优解 有正则项
def w_compute_with_regulation(X, Y, lamda):
'''
计算计算由带正则项的损失函数求导得出的k
'''
return np.dot(np.dot(np.linalg.inv(np.dot(X.transpose(), X) + lamda * np.identity(X.shape[1])), X.transpose()), Y)
lamda = np.e ** -10
w2 = w_compute_with_regulation(X_Mdegerr, Y_train, lamda)
Y_test2 = compute(w2, X_ideal, M)
plt.subplot(334)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test2)
plt.title("With regulation fitting lamda = e^-12")
plt.xlabel('x')
plt.ylabel('y')
# 有惩罚项情况的最优解选取
M1 = 9
X_Mdegerr1 = X_maker(X_train, M1)
ERMS_list1 = []
ERMS_list2 = []
X_temp, Y_test3 = ideal_data(10)
for i in range(100):
w = w_compute_with_regulation(X_Mdegerr1, Y_train, np.e ** ((i / 2) + -50))
ERMS_list1.append(ERMS(X_Mdegerr, Y_train, w))
for i in range(10):
w = w_compute_with_regulation(X_Mdegerr1, Y_train, np.e ** ((i * 5) + -50))
ERMS_list2.append(ERMS(X_Mdegerr, Y_test3, w))
# print(X_show)
plt.subplot(335)
# 必须得矩阵竖着的一列才能正常显示
np.mat(np.linspace(-35,-20,100))
plt.plot(np.mat(np.linspace(-35,-20,100)).transpose(), np.mat(ERMS_list1).transpose(), np.mat(np.linspace(-35,-20,10)).transpose(), np.mat(ERMS_list2).transpose())
plt.title("EMRS praph with lamda")
plt.xlabel('lamda')
plt.ylabel('EMRS')
# 梯度下降
def loss(X, Y, w, lamda):
'''
计算loss,也就是需要被最小化的量
'''
return np.sum(np.dot((np.dot(X, w) - Y).transpose(), (np.dot(X, w) - Y)) + lamda * np.dot(w.transpose(), w)) * (
1 / (2 * X.shape[0]))
def partial_derivative(X, Y, w, lamda):
'''
对w求偏导
'''
return np.dot((np.dot(X.transpose(), X) + lamda * np.identity(X.shape[1])), w) - np.dot(X.transpose(), Y)
# lr learning rate 学习率
lr = 0.01
# quit_flag,当前后两次误差函数计算得出的误差的差值小于quit_flag = 0.01时退出循环
quit_flag = 0.000001
w_iteration = np.mat(np.zeros(10)).transpose()
loss0 = loss(X_Mdegerr, Y_train, w_iteration, lamda)
# partial_derivative = partial_derivative(X_Mdegerr,Y_train,w_iteration,lamda)
for i in range(99999999):
w_iteration = w_iteration - lr * partial_derivative(X_Mdegerr, Y_train, w_iteration, lamda)
loss_old = loss0
loss0 = loss(X_Mdegerr, Y_train, w_iteration, lamda)
if math.fabs(loss_old - loss0) < quit_flag:
break;
#print(w_iteration)
plt.subplot(336)
# 必须得矩阵竖着的一列才能正常显示
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, compute(w_iteration, X_ideal, 9))
plt.title("Gradient descent")
plt.xlabel('x')
plt.ylabel('y')
# 共轭梯度法
def conjugate_gradient(X,Y,M,lamda,delta):
'''
用共轭梯度法计算w
'''
X = X_maker(X,M)
A = np.dot(X.transpose(),X) + lamda * np.identity(X.shape[1])
b = np.dot(X.transpose(),Y)
w_gc = np.mat(np.zeros(X.shape[1])).transpose()
r = b - np.dot(A,w_gc)
p = r
k = 0
for i in range(9999999999):
k = k + 1
print(k)
alpha = np.sum(np.dot(r.transpose(),r)) /np.sum(np.dot(np.dot(p.transpose(),A.transpose()),p))
w_gc = w_gc + alpha * p
r_now = r - alpha * np.dot(A,p)
if math.fabs(np.sum(np.dot(r_now.transpose(),r_now))) < delta:
break
beta = np.sum(np.dot(r_now.transpose(),r_now)) /np.sum(np.dot(r.transpose(),r))
p = r_now + beta * p
r = r_now
return w_gc
w_gc = conjugate_gradient(X_train,Y_train,9,np.e ** -12,0.000000001)
plt.subplot(337)
# 必须得矩阵竖着的一列才能正常显示
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, compute(w_gc, X_ideal, 9))
plt.title("conjugate gradient")
plt.xlabel('x')
plt.ylabel('y')
plt.show()
# print(np.c_[ending,np.multiply(ending[:,0],X_train)])
'''
np.multiply是对应元素相乘,np.dot才是真正的矩阵相乘
a = np.mat("1 2;3 4")
print(a)
print( np.multiply(a[:,0],a[:,0]))
'''
Written on January 21, 2020