參考的同學的博客:
https://blog.csdn.net/Willen_/article/details/89261545?from=singlemessage
心得感悟:
萬事開頭難,但咬咬牙熬一熬,Python就顯露出它傳說中簡單易用的特點了。(用慣了MATLAB,用不慣Python,一開始寫的時候很艱難)
以下是實現代碼:
'''
Date 2019/4/15
'''
import numpy as np
import h5py
def load_data(file_name):
'''數據導入函數 data import function
input: file_name(string)
output: feature_data(mat)
label_data(mat)
'''
fr = open(file_name)
feature_data =[]
label_data = []
for line in fr.readlines():
curLine = []
lineArr = line.split('\t')
for i in range(0,3):
if i < 2:
curLine.append(float(lineArr[i]))
if i == 1:
feature_data.append(curLine)
else:
tempLine = []
tempLine.append(int(lineArr[i]))
label_data.append(tempLine)
fr.close()
feature_mat = np.array(feature_data, dtype = float)
label_mat = np.array(label_data, dtype = int)
return feature_mat, label_mat
def error_rate(h, label_data):
'''計算當前損失函數 calculate the current loss function
input: h(mat) predictive value
label_data
output: error(float)
'''
n = label_data.shape[0]
# print(n)
# print(label_data.shape[0])
error = -1.0/n * np.sum(label_data *np.log(h) + (1 - label_data)*np.log(1-h))
error = np.squeeze(error)
return error
def sig(x):
'''sigmoid function
input: x(mat) feature_data * w
output: sigmoid(x)(mat) Sigmoid value
'''
z = 1.0/(1+np.exp(-x))
return z
def lr_train_bgd(feature_data, label_data, maxCycle, alpha):
'''training LR modal by batch gradient descent method
input: feature_data(mat), label_data(mat), maxCycle(int), alpha(float)
output: w(mat) weight
'''
X = feature_data
Y = label_data
w = np.random.randn(feature_data.shape[1], 1)
b = np.random.random()
n = feature_data.shape[0]
for i in range(0, maxCycle):
# print(i)
# print(np.dot(X, w))
# print(b)
H = sig(np.dot(X, w) + b)
error = error_rate(H, Y)
if i % 50 == 0:
print(error)
dw = 1.0/n * np.dot(X.T, (H-Y))
db = 1.0/n * np.sum(H-Y)
w = w - alpha * dw
b = b - alpha * db
w = np.vstack((w, b))
return w
def save_model(file_name, w):
'''
input: file_name(string), w(mat)
'''
f = h5py.File(file_name, "w")
f.create_dataset("w", data = w)
f.close()
測試代碼:
if __name__ == "__main__":
print("-----1. load data-----")
feature_data, label_data = load_data("train_data.txt")
print("-----2. training-----")
w = lr_train_bgd(feature_data, label_data, 10000, 0.0001)
print("-----3. save model-----")
save_model("weights", w)
數據集:https://download.csdn.net/download/thisismykungfu/11136541