斯坦福大學機器學習作業題Problem Set #1 Regression for denoising quasar spectra 上篇



# -*- coding: utf-8 -*-
import numpy as np
import math
import matplotlib.pyplot as plt
def read(i):
    fr=open('quasar_train.csv','r')
    arrayline=fr.readlines()
    y=arrayline[1].strip().split(',')
    n=len(y)
    y=map(lambda y: float(y), y)
    x=np.zeros((2,n))
    x[0]=arrayline[0].strip().split(',')
    x[1]=1
    return x,y
def linear_regression_theta(x,y):             #計算線性迴歸係數
    xx=np.dot(x,np.transpose(x))
    xx_inverse=np.linalg.inv(xx)
    xx_inverse_x=np.dot(xx_inverse,x)
    xx_inverse_x_y=np.dot(xx_inverse_x,np.transpose(y))
    theta=xx_inverse_x_y
    y1 =[]
    for i in range(len(x[0])):
        y1.append(x[0][i] * theta[0] + theta[1])
    return y1
def weighted_linear_regression(x,y,t):#加權線性迴歸
    y2=[]
    for i in range(len(x[0])):
        w=np.zeros((len(x[0]),len(x[0])))
        for j in range(len(x[0])):
            w[j][j]=math.exp((x[0][i] - x[0][j])*(x[0][i] - x[0][j]) / (-2 * t * t))
        xwx=np.dot(np.dot(x,w),np.transpose(x))
        xwx_inverse=np.linalg.inv(xwx)
        xwx_inverse_x=np.dot(xwx_inverse,x)
        xwx_inverse_x_w = np.dot(xwx_inverse_x,w)
        xwx_inverse_x_w_y=np.dot(xwx_inverse_x_w,np.transpose(y))
        theta=xwx_inverse_x_w_y
        y2.append(x[0][i] * theta[0] + theta[1])
    return y2
def figure(x,y,y1,y2,y3,y4,y5,y6):        #畫圖
    plt.figure(1)
    plt.xlabel('Wavelength')
    plt.ylabel('Flux')
    p1 = plt.scatter(x[0],y, marker='.', color='b', label='real value', s=10)
    p2 = plt.plot(x[0], y1, color='r', linewidth=1, label='linear regression')
    p3 = plt.plot(x[0], y2, color='g', linewidth=2, label='weighted linear regression t=5')
    plt.legend(loc='upper right')
    plt.show()
    plt.figure(2)
    plt.xlabel('Wavelength')
    plt.ylabel('Flux')
    p1 = plt.scatter(x[0], y, marker='.', color='blue', label='real value', s=10)
    p4 = plt.plot(x[0], y3, color='grey', linewidth=1.5, label='weighted linear regression t=1')
    p5 = plt.plot(x[0], y4, color='black', linewidth=2.5, label='weighted linear regression t=10')
    p6 = plt.plot(x[0], y5, color='green', linewidth=2.5, label='weighted linear regression t=100')
    p7 = plt.plot(x[0], y6, color='orange', linewidth=1.5, label='weighted linear regression t=1000')
    plt.legend(loc='upper right')
    plt.show()
def main():
    x,y=read(1)
    y1=linear_regression_theta(x,y)
    y2= weighted_linear_regression(x,y,5)
    y3= weighted_linear_regression(x,y,1)
    y4= weighted_linear_regression(x,y,10)
    y5= weighted_linear_regression(x,y,100)
    y6= weighted_linear_regression(x,y,1000)
    figure(x,y,y1,y2,y3,y4,y5,y6)
if __name__ == '__main__':
    main()


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章