1 數據集:
在我的博客裏有.txt格式的
你也可以自己構造,代碼如下:
density=np.array([0.697,0.774,0.634,0.608,0.556,0.430,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]).reshape(-1,1)
sugar_rate=np.array([0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]).reshape(-1,1)
xtrain=np.hstack((density,sugar_rate))
xtrain=np.hstack((np.ones([density.shape[0],1]),xtrain))
ytrain=np.array([1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]).reshape(-1,1)
2 算法實現代碼如下:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.preprocessing import PolynomialFeatures
# 數據是否需要標準化
scale = False
# 載入數據
data = np.genfromtxt(r"F:\CDA數據分析\data\西瓜書\14917341.txt", delimiter="\t")
# 數據處理,添加偏置項
x_data = data[:,:3]
y_data = data[:,-1,np.newaxis]
#print(x_data)
print(np.mat(x_data).shape)
print(np.mat(y_data).shape)
# 給樣本添加偏置項
X_data = np.concatenate((np.ones((17,1)),x_data),axis=1)
print(X_data.shape)
def sigmoid(x):#迴歸公式
return 1.0/(1+np.exp(-x))
def cost(xMat, yMat, ws):#代價函數
left = np.multiply(yMat, np.log(sigmoid(xMat*ws)))#普通相乘(對應元素相乘;點乘用.dot())
right = np.multiply(1 - yMat, np.log(1 - sigmoid(xMat*ws)))
return np.sum(left + right) / -(len(xMat))
def gradAscent(xArr, yArr):
if scale == True:
xArr = preprocessing.scale(xArr)#原始數據標準化
xMat = np.mat(xArr)#矩陣轉化
yMat = np.mat(yArr)
lr = 0.001
epochs = 10000#步數
costList = []#代價函數裝的內容
# 計算數據列數,有幾列就有幾個權值
m,n = np.shape(xMat)
# 初始化權值
ws = np.mat(np.ones((n,1)))
for i in range(epochs+1):
# xMat和weights矩陣相乘
h = sigmoid(xMat*ws)
# 計算誤差
ws_grad = xMat.T*(h - yMat)/m
ws = ws - lr*ws_grad
if i % 50 == 0:
costList.append(cost(xMat,yMat,ws))
return ws,costList
# 訓練模型,得到權值和cost值的變化
ws,costList = gradAscent(X_data, y_data)
print(ws)
# 畫圖 loss值的變化
x = np.linspace(0,10000,201)
plt.plot(x, costList, c='r')
# plt.title('Train')
# plt.xlabel('Epochs')
# plt.ylabel('Cost')
plt.show()
# 預測
def predict(x_data, ws):
if scale == True:#預處理否
x_data = preprocessing.scale(x_data)
xMat = np.mat(x_data)
ws = np.mat(ws)
return [1 if x >= 0.5 else 0 for x in sigmoid(xMat*ws)]#嵌套表達式
predictions = predict(X_data, ws)
print(classification_report(y_data, predictions))