機器學習 - 研究生課程 - Python代碼實現與筆記——Logistic Regression

參考的同學的博客:

https://blog.csdn.net/Willen_/article/details/89261545?from=singlemessage

心得感悟:

萬事開頭難,但咬咬牙熬一熬,Python就顯露出它傳說中簡單易用的特點了。(用慣了MATLAB,用不慣Python,一開始寫的時候很艱難)

以下是實現代碼:

'''
Date 2019/4/15
'''
import numpy as np
import h5py

def load_data(file_name):
    '''數據導入函數 data import function
    input: file_name(string)
    output: feature_data(mat)
            label_data(mat)
    '''
    fr = open(file_name)
    feature_data =[]
    label_data = []
    for line in fr.readlines():
        curLine = []
        lineArr = line.split('\t')
        for i in range(0,3):
            if i < 2:
                curLine.append(float(lineArr[i]))
                if i == 1:
                    feature_data.append(curLine)
            else:
                tempLine = []
                tempLine.append(int(lineArr[i]))
                label_data.append(tempLine)        
    fr.close()
    feature_mat = np.array(feature_data, dtype = float)
    label_mat = np.array(label_data, dtype = int)
    return feature_mat, label_mat

def error_rate(h, label_data):
    '''計算當前損失函數 calculate the current loss function
    input: h(mat) predictive value
            label_data
    output: error(float)
    '''
    n = label_data.shape[0]
    # print(n)
    # print(label_data.shape[0])
    error = -1.0/n * np.sum(label_data *np.log(h) + (1 - label_data)*np.log(1-h))
    error = np.squeeze(error)
    return error

def sig(x):
    '''sigmoid function
    input: x(mat) feature_data * w
    output: sigmoid(x)(mat) Sigmoid value
    '''
    z = 1.0/(1+np.exp(-x))
    return z

def lr_train_bgd(feature_data, label_data, maxCycle, alpha):
    '''training LR modal by batch gradient descent method
    input: feature_data(mat), label_data(mat), maxCycle(int), alpha(float)
    output: w(mat) weight
    '''
    X = feature_data
    Y = label_data
    w = np.random.randn(feature_data.shape[1], 1)
    b = np.random.random()
    n = feature_data.shape[0]
    for i in range(0, maxCycle):
        # print(i)
        # print(np.dot(X, w))
        # print(b)
        H = sig(np.dot(X, w) + b)
        error = error_rate(H, Y)
        if i % 50 == 0:
            print(error)
        dw = 1.0/n * np.dot(X.T, (H-Y))
        db = 1.0/n * np.sum(H-Y)
        w = w - alpha * dw
        b = b - alpha * db
    w = np.vstack((w, b))
    return w

def save_model(file_name, w):
    '''
    input: file_name(string), w(mat)
    '''
    f = h5py.File(file_name, "w")
    f.create_dataset("w", data = w)
    f.close()

測試代碼:

if __name__ == "__main__":
    print("-----1. load data-----")
    feature_data, label_data = load_data("train_data.txt")

    print("-----2. training-----")
    w = lr_train_bgd(feature_data, label_data, 10000, 0.0001)

    print("-----3. save model-----")
    save_model("weights", w)

數據集:https://download.csdn.net/download/thisismykungfu/11136541

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章