Task03：邏輯迴歸

原創

R_TRIG

2020-02-20 15:29

理論部分

邏輯迴歸與線性迴歸的聯繫與區別
模型建立：邏輯迴歸原理、邏輯迴歸模型
學習策略：邏輯迴歸損失函數、推導及優化
算法求解：批量梯度下降
正則化與模型評估指標
邏輯迴歸的優缺點
樣本不均衡問題
sklearn參數詳解

案例:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline


df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #讀取X值
ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #讀取y值
ys = ys.astype(int)
df_X['label'] = ys[0].values #將X按照y值的結果一一打標籤

ax = plt.axes()
#在二維圖中描繪X點所處位置，直觀查看數據點的分佈情況
df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

#提取用於學習的數據
Xs = df_X[[0, 1]].values
Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs])
ys = df_X['label'].values


from __future__ import print_function
import numpy as np
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(fit_intercept=False) #因爲前面已經將截距項的值合併到變量中，此處參數設置不需要截距項
lr.fit(Xs, ys) #擬合
score = lr.score(Xs, ys) #結果評價
print("Coefficient: %s" % lr.coef_)
print("Score: %s" % score)


ax = plt.axes()

df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

_xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])

#將數據以二維圖形式描點，並用學習得出的參數結果作爲閾值，劃分數據區域
_ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
plt.plot(_xs, _ys, lw=1)


class LGR_GD():
    def __init__(self):
        self.w = None
        self.n_iters = None

    def fit(self, X, y, alpha=0.03, loss=1e-10):  # 設定步長爲0.002，判斷是否收斂的條件爲1e-10
        y = y.reshape(-1, 1)  # 重塑y值的維度以便矩陣運算
        [m, d] = np.shape(X)  # 自變量的維度
        self.w = np.zeros((1, d))  # 將參數的初始值定爲0
        tol = 1e5
        self.n_iters = 0
        # ============================= show me your code =======================
        while tol > loss: #設置收斂條件
            for i in range(d):
                temp = y - X.dot(self.w)
                self.w[i] = self.w[i] + alpha *np.sum(temp * X[:,i])/m
            
            tol = np.abs(np.sum(y -  X.dot(self.w)))
            self.n_iters += 1 #更新迭代次數
 
        # ============================= show me your code =======================

    def predict(self, X):
        # 用已經擬合的參數值預測新自變量
        y_pred = X.dot(self.w)
        return y_pred


if __name__ == "__main__":
    lr_gd = LGR_GD()
    lr_gd.fit(Xs, ys)

    ax = plt.axes()

    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')

    _xs = np.array([np.min(Xs[:, 1]), np.max(Xs[:, 1])])
    _ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2])
    plt.plot(_xs, _ys, lw=1)


class LGR_NT():
    def __init__(self):
        self.w = None
        self.n_iters = None

    def fit(self, X, y, loss=1e-10):  # 判斷是否收斂的條件爲1e-10
        y = y.reshape(-1, 1)  # 重塑y值的維度以便矩陣運算
        [m, d] = np.shape(X)  # 自變量的維度
        self.w = np.zeros((1, d))  # 將參數的初始值定爲0
        tol = 1e5
        n_iters = 0
        Hessian = np.zeros((d, d))
        # ============================= show me your code =======================
        while tol > loss:
            n_iters += 1
        # ============================= show me your code =======================
        self.w = theta
        self.n_iters = n_iters

    def predict(self, X):
        # 用已經擬合的參數值預測新自變量
        y_pred = X.dot(self.w)
        return y_pred


if __name__ == "__main__":
    lgr_nt = LGR_NT()
    lgr_nt.fit(Xs, ys)





print("梯度下降法結果參數：%s;梯度下降法迭代次數：%s" %(lgr_gd.w,lgr_gd.n_iters))
print("牛頓法結果參數：%s;牛頓法迭代次數：%s" %(lgr_nt.w,lgr_nt.n_iters))