機器學習ex1:多變量線性迴歸

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# 均值歸一化,學習一下
data = pd.read_csv('ex1data2.txt',names =['house sizes','bedroom numbers','price'])
def feature_normalization(data):
    return data.apply(lambda column: (column - column.mean()) / column.std())
data = feature_normalization(data)

#獲取矩陣X,y
x1 = data['house sizes']
x2 = data['bedroom numbers']
y = data['price']
m = len(y)
ones = pd.DataFrame({'ones':np.ones(m)})
X = pd.concat([ones,x1,x2],axis=1)

def J_function(X,y,theta):
    h_theta = X.dot(theta)
    cost = ( ((h_theta - y).T ).dot( (h_theta-y)) ) /(2*m)
    return cost

def gradient(X,y,theta):
    gradient = (X.T).dot( X.dot(theta) - y )
    return gradient/m

theta = np.zeros( X.shape[1])

def batch_gradient_descent(X,y,theta,alpha,iterations):
    cost_data = [J_function(X,y,theta)]
    _theta= theta.copy()
    for _ in range(iterations):
        _theta = _theta - alpha * gradient(X,y,_theta)
        cost_data.append(J_function(X,y,_theta))
    return _theta,cost_data


#具有適當學習率的梯度下降收斂
alpha = 0.03
iterations=50
_theta,cost_data = batch_gradient_descent(X,y,theta,alpha,iterations)
print(_theta)
plt.plot(np.arange(len(cost_data)), cost_data)
plt.xlabel('Numbers of iterations'),plt.ylabel('Cost J')
plt.show()

#正規方程求解參數最優值
def normal_equations(X,y):
    theta =( ( np.linalg.inv( (X.T).dot(X)) ).dot(X.T) ).dot(y)
    return theta

theta = normal_equations(X,y)
print(theta)


#選擇學習率是門技術活,不會
base = np.logspace(-1, -5, num=4)
candidate = np.sort( np.concatenate((base, base*3)) )

iterations=50

for alpha in candidate:
    _theta, cost_data = batch_gradient_descent(X, y, theta, alpha, iterations)
    plt.plot(np.arange(len(cost_data)), cost_data)
plt.xlabel('Numbers of iterations'), plt.ylabel('Cost J')
plt.show()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章