Task03:邏輯迴歸

理論部分

  • 邏輯迴歸與線性迴歸的聯繫與區別
  • 模型建立:邏輯迴歸原理、邏輯迴歸模型
  • 學習策略:邏輯迴歸損失函數、推導及優化
  • 算法求解:批量梯度下降
  • 正則化與模型評估指標
  • 邏輯迴歸的優缺點
  • 樣本不均衡問題
  • sklearn參數詳解
  1. 案例:
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    %matplotlib inline
    
    
    df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #讀取X值
    ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #讀取y值
    ys = ys.astype(int)
    df_X['label'] = ys[0].values #將X按照y值的結果一一打標籤
    
    ax = plt.axes()
    #在二維圖中描繪X點所處位置,直觀查看數據點的分佈情況
    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
    
    #提取用於學習的數據
    Xs = df_X[[0, 1]].values
    Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs])
    ys = df_X['label'].values
    
    
    from __future__ import print_function
    import numpy as np
    from sklearn.linear_model import LogisticRegression
    
    lr = LogisticRegression(fit_intercept=False) #因爲前面已經將截距項的值合併到變量中,此處參數設置不需要截距項
    lr.fit(Xs, ys) #擬合
    score = lr.score(Xs, ys) #結果評價
    print("Coefficient: %s" % lr.coef_)
    print("Score: %s" % score)
    
    
    ax = plt.axes()
    
    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
    
    _xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])
    
    #將數據以二維圖形式描點,並用學習得出的參數結果作爲閾值,劃分數據區域
    _ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
    plt.plot(_xs, _ys, lw=1)
    
    
    class LGR_GD():
        def __init__(self):
            self.w = None
            self.n_iters = None
    
        def fit(self, X, y, alpha=0.03, loss=1e-10):  # 設定步長爲0.002,判斷是否收斂的條件爲1e-10
            y = y.reshape(-1, 1)  # 重塑y值的維度以便矩陣運算
            [m, d] = np.shape(X)  # 自變量的維度
            self.w = np.zeros((1, d))  # 將參數的初始值定爲0
            tol = 1e5
            self.n_iters = 0
            # ============================= show me your code =======================
            while tol > loss: #設置收斂條件
                for i in range(d):
                    temp = y - X.dot(self.w)
                    self.w[i] = self.w[i] + alpha *np.sum(temp * X[:,i])/m
                
                tol = np.abs(np.sum(y -  X.dot(self.w)))
                self.n_iters += 1 #更新迭代次數
     
            # ============================= show me your code =======================
    
        def predict(self, X):
            # 用已經擬合的參數值預測新自變量
            y_pred = X.dot(self.w)
            return y_pred
    
    
    if __name__ == "__main__":
        lr_gd = LGR_GD()
        lr_gd.fit(Xs, ys)
    
        ax = plt.axes()
    
        df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
        df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
    
        _xs = np.array([np.min(Xs[:, 1]), np.max(Xs[:, 1])])
        _ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2])
        plt.plot(_xs, _ys, lw=1)
    
    
    class LGR_NT():
        def __init__(self):
            self.w = None
            self.n_iters = None
    
        def fit(self, X, y, loss=1e-10):  # 判斷是否收斂的條件爲1e-10
            y = y.reshape(-1, 1)  # 重塑y值的維度以便矩陣運算
            [m, d] = np.shape(X)  # 自變量的維度
            self.w = np.zeros((1, d))  # 將參數的初始值定爲0
            tol = 1e5
            n_iters = 0
            Hessian = np.zeros((d, d))
            # ============================= show me your code =======================
            while tol > loss:
                n_iters += 1
            # ============================= show me your code =======================
            self.w = theta
            self.n_iters = n_iters
    
        def predict(self, X):
            # 用已經擬合的參數值預測新自變量
            y_pred = X.dot(self.w)
            return y_pred
    
    
    if __name__ == "__main__":
        lgr_nt = LGR_NT()
        lgr_nt.fit(Xs, ys)
    
    
    
    
    
    print("梯度下降法結果參數:%s;梯度下降法迭代次數:%s" %(lgr_gd.w,lgr_gd.n_iters))
    print("牛頓法結果參數:%s;牛頓法迭代次數:%s" %(lgr_nt.w,lgr_nt.n_iters))
    

     

發佈了33 篇原創文章 · 獲贊 4 · 訪問量 5萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章