多项式拟合曲线

目标：

掌握最小二乘法求解（无惩罚项的损失函数）、掌握加惩罚项（2范数）的损失函数优化、梯度下降法、共轭梯度法、理解过拟合、克服过拟合的方法(如加惩罚项、增加样本)

要求：

（1）生成数据，加入噪声；
（2）用高阶多项式函数拟合曲线；
（3）用解析解求解两种loss的最优解（无正则项和有正则项）
（4）优化方法求解最优解（梯度下降，共轭梯度）；
（5）用你得到的实验数据，解释过拟合。
（6）用不同数据量，不同超参数，不同的多项式阶数，比较实验效果。
（7）语言不限，可以用matlab，python。求解解析解时可以利用现成的矩阵求逆。梯度下降，共轭梯度要求自己求梯度，迭代优化自己写。不许用现成的平台，例如pytorch，tensorflow的自动微分工具。

import numpy as np
import math
from matplotlib import pyplot as plt

def ideal_data(N):
    '''
    生成完美sin曲线
    :param N: 生成数据组数
    :return: 生成数据横纵坐标
    '''
    X = np.linspace(0, 1, N)
    Y = np.sin(2 * np.pi * X)
    return np.mat(X).transpose(), np.mat(Y).transpose()

def generator(N):
    '''
    生成带loc = 0 scale = 0.3的噪声的正弦曲线
    :param N: 生成数据组数
    :return: 生成数据横纵坐标
    '''
    X = np.linspace(0, 1, N)
    Y = np.random.normal(loc=0.0, scale=0.3, size=N) + np.sin(2 * np.pi * X)
    return np.mat(X).transpose(), np.mat(Y).transpose()

def X_maker(X, M):
    '''
    将X转化为 1 X1^1 X1^2 ... X1^M
              1 X2^1 X2^2 ... X2^M
                          ...
              1 Xn^1 Xn^2 ... Xn^M
    :param X: ndarray的X
    :param degree: 拟合阶数
    :return: 用于求解的X的变化后的矩阵
    '''
    ending = np.mat(np.ones(X.shape[0])).transpose()
    for i in range(M):
        ending = np.c_[ending, np.multiply(ending[:, i], X)]
    return ending

def w_comput(X, Y):
    '''
    求 Xw = Y 中的w
    '''
    return np.dot(np.linalg.pinv(X), Y)

def compute(w, X, M):
    '''
    由 Xw = Y计算Y
    '''
    return np.dot(X_maker(X, M), w)

M = 9
# 用高阶多项式函数拟合曲线
X_train, Y_train = generator(10)
X_Mdegerr = X_maker(X_train, M)
X_ideal, Y_ideal = ideal_data(100)
w = w_comput(X_Mdegerr, Y_train)
Y_test = compute(w, X_ideal, M)

plt.subplot(331)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test)
plt.title("polynomial fitting")
plt.xlabel('x')
plt.ylabel('y')

# plt.show()

# 用解析解求解两种loss的最优解    无正则项
def ERMS(X, Y, w):
    return math.sqrt((np.sum(np.dot((np.dot(X, w) - Y).transpose(), (np.dot(X, w) - Y)))) / X.shape[0])


def w_compute_without_regulation(X, Y):
    '''
    计算计算由不带正则项的损失函数求导得出的k
    '''
    return np.dot(np.dot(np.linalg.inv(np.dot(X.transpose(), X)), X.transpose()), Y)


w1 = w_compute_without_regulation(X_Mdegerr, Y_train)
Y_test1 = compute(w1, X_ideal, M)

plt.subplot(332)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test1)
plt.title("Without regulation fitting")
plt.xlabel('x')
plt.ylabel('y')

ERMS_list3 = []
degreelist = []
ERMS_list4 = []
X_temp0, Y_ideal_0 = ideal_data(10)
for i in range(9):
    degreelist.append(i + 1)
    temp = X_maker(X_train, i + 1)
    w = w_compute_without_regulation(temp, Y_train)
    ERMS_list3.append(ERMS(temp, Y_train, w))
    ERMS_list4.append(ERMS(temp, Y_ideal_0, w))

plt.subplot(333)
plt.plot(np.mat(np.array(degreelist)).transpose(), np.mat(ERMS_list3).transpose(),
         np.mat(np.array(degreelist)).transpose(), np.mat(ERMS_list4).transpose())
plt.title("Without regulation fitting")
plt.xlabel('M')
plt.ylabel('EMRS')


# 用解析解求解两种loss的最优解    有正则项
def w_compute_with_regulation(X, Y, lamda):
    '''
    计算计算由带正则项的损失函数求导得出的k
    '''
    return np.dot(np.dot(np.linalg.inv(np.dot(X.transpose(), X) + lamda * np.identity(X.shape[1])), X.transpose()), Y)


lamda = np.e ** -10
w2 = w_compute_with_regulation(X_Mdegerr, Y_train, lamda)
Y_test2 = compute(w2, X_ideal, M)

plt.subplot(334)
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, Y_test2)
plt.title("With regulation fitting lamda = e^-12")
plt.xlabel('x')
plt.ylabel('y')

# 有惩罚项情况的最优解选取


M1 = 9
X_Mdegerr1 = X_maker(X_train, M1)
ERMS_list1 = []
ERMS_list2 = []
X_temp, Y_test3 = ideal_data(10)
for i in range(100):
    w = w_compute_with_regulation(X_Mdegerr1, Y_train, np.e ** ((i / 2) + -50))
    ERMS_list1.append(ERMS(X_Mdegerr, Y_train, w))

for i in range(10):
    w = w_compute_with_regulation(X_Mdegerr1, Y_train, np.e ** ((i * 5) + -50))
    ERMS_list2.append(ERMS(X_Mdegerr, Y_test3, w))

# print(X_show)
plt.subplot(335)
# 必须得矩阵竖着的一列才能正常显示
np.mat(np.linspace(-35,-20,100))
plt.plot(np.mat(np.linspace(-35,-20,100)).transpose(), np.mat(ERMS_list1).transpose(), np.mat(np.linspace(-35,-20,10)).transpose(), np.mat(ERMS_list2).transpose())
plt.title("EMRS praph with lamda")
plt.xlabel('lamda')
plt.ylabel('EMRS')


# 梯度下降
def loss(X, Y, w, lamda):
    '''
    计算loss，也就是需要被最小化的量
    '''
    return np.sum(np.dot((np.dot(X, w) - Y).transpose(), (np.dot(X, w) - Y)) + lamda * np.dot(w.transpose(), w)) * (
                1 / (2 * X.shape[0]))


def partial_derivative(X, Y, w, lamda):
    '''
    对w求偏导
    '''
    return np.dot((np.dot(X.transpose(), X) + lamda * np.identity(X.shape[1])), w) - np.dot(X.transpose(), Y)


# lr learning rate 学习率
lr = 0.01
# quit_flag，当前后两次误差函数计算得出的误差的差值小于quit_flag = 0.01时退出循环
quit_flag = 0.000001
w_iteration = np.mat(np.zeros(10)).transpose()
loss0 = loss(X_Mdegerr, Y_train, w_iteration, lamda)
# partial_derivative = partial_derivative(X_Mdegerr,Y_train,w_iteration,lamda)
for i in range(99999999):
    w_iteration = w_iteration - lr * partial_derivative(X_Mdegerr, Y_train, w_iteration, lamda)
    loss_old = loss0
    loss0 = loss(X_Mdegerr, Y_train, w_iteration, lamda)
    if math.fabs(loss_old - loss0) < quit_flag:
        break;
#print(w_iteration)

plt.subplot(336)
# 必须得矩阵竖着的一列才能正常显示
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, compute(w_iteration, X_ideal, 9))
plt.title("Gradient descent")
plt.xlabel('x')
plt.ylabel('y')

# 共轭梯度法
def conjugate_gradient(X,Y,M,lamda,delta):
    '''
    用共轭梯度法计算w
    '''
    X = X_maker(X,M)
    A = np.dot(X.transpose(),X) + lamda * np.identity(X.shape[1])
    b = np.dot(X.transpose(),Y)
    w_gc = np.mat(np.zeros(X.shape[1])).transpose()
    r = b - np.dot(A,w_gc)
    p = r
    k = 0
    for i in range(9999999999):
        k = k + 1
        print(k)
        alpha = np.sum(np.dot(r.transpose(),r)) /np.sum(np.dot(np.dot(p.transpose(),A.transpose()),p))
        w_gc = w_gc + alpha * p
        r_now = r - alpha * np.dot(A,p)
        if math.fabs(np.sum(np.dot(r_now.transpose(),r_now))) < delta:
            break
        beta = np.sum(np.dot(r_now.transpose(),r_now)) /np.sum(np.dot(r.transpose(),r))
        p = r_now + beta * p
        r = r_now
    return w_gc

w_gc = conjugate_gradient(X_train,Y_train,9,np.e ** -12,0.000000001)

plt.subplot(337)
# 必须得矩阵竖着的一列才能正常显示
plt.plot(X_train, Y_train, "ro", X_ideal, Y_ideal, X_ideal, compute(w_gc, X_ideal, 9))
plt.title("conjugate gradient")
plt.xlabel('x')
plt.ylabel('y')

plt.show()

# print(np.c_[ending,np.multiply(ending[:,0],X_train)])


'''
np.multiply是对应元素相乘，np.dot才是真正的矩阵相乘
a = np.mat("1 2;3 4")
print(a)
print( np.multiply(a[:,0],a[:,0]))
'''

Written on January 21, 2020