西瓜書-對率迴歸-梯度下降法

1 數據集:

在我的博客裏有.txt格式的

你也可以自己構造,代碼如下:

density=np.array([0.697,0.774,0.634,0.608,0.556,0.430,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]).reshape(-1,1)
sugar_rate=np.array([0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]).reshape(-1,1)
xtrain=np.hstack((density,sugar_rate))
xtrain=np.hstack((np.ones([density.shape[0],1]),xtrain))
ytrain=np.array([1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]).reshape(-1,1)

2 算法實現代碼如下:

import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
# 數據是否需要標準化
scale = False

# 載入數據
data = np.genfromtxt(r"F:\CDA數據分析\data\西瓜書\14917341.txt", delimiter="\t")

# 數據處理,添加偏置項
x_data = data[:,:3]
y_data = data[:,-1,np.newaxis]
#print(x_data)
print(np.mat(x_data).shape)
print(np.mat(y_data).shape)
# 給樣本添加偏置項
X_data = np.concatenate((np.ones((17,1)),x_data),axis=1)
print(X_data.shape)

def sigmoid(x):#迴歸公式
    return 1.0/(1+np.exp(-x))

def cost(xMat, yMat, ws):#代價函數
    left = np.multiply(yMat, np.log(sigmoid(xMat*ws)))#普通相乘(對應元素相乘;點乘用.dot())
    right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat*ws)))
    return np.sum(left + right) / -(len(xMat))

def gradAscent(xArr, yArr):
    
    if scale == True:
        xArr = preprocessing.scale(xArr)#原始數據標準化
    xMat = np.mat(xArr)#矩陣轉化
    yMat = np.mat(yArr)
    
    lr = 0.001
    epochs = 10000#步數
    costList = []#代價函數裝的內容
    # 計算數據列數,有幾列就有幾個權值
    m,n = np.shape(xMat)
    # 初始化權值
    ws = np.mat(np.ones((n,1)))
    
    for i in range(epochs+1):             
        # xMat和weights矩陣相乘
        h = sigmoid(xMat*ws)   
        # 計算誤差
        ws_grad = xMat.T*(h - yMat)/m
        ws = ws - lr*ws_grad 
        
        if i % 50 == 0:
            costList.append(cost(xMat,yMat,ws))
    return ws,costList

# 訓練模型,得到權值和cost值的變化
ws,costList = gradAscent(X_data, y_data)
print(ws)

# 畫圖 loss值的變化
x = np.linspace(0,10000,201)
plt.plot(x, costList, c='r')
# plt.title('Train')
# plt.xlabel('Epochs')
# plt.ylabel('Cost')
plt.show()

# 預測
def predict(x_data, ws):
    if scale == True:#預處理否
        x_data = preprocessing.scale(x_data)
    xMat = np.mat(x_data)
    ws = np.mat(ws)
    return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]#嵌套表達式

predictions = predict(X_data, ws)

print(classification_report(y_data, predictions))

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章