理論部分
- 邏輯迴歸與線性迴歸的聯繫與區別
- 模型建立:邏輯迴歸原理、邏輯迴歸模型
- 學習策略:邏輯迴歸損失函數、推導及優化
- 算法求解:批量梯度下降
- 正則化與模型評估指標
- 邏輯迴歸的優缺點
- 樣本不均衡問題
- sklearn參數詳解
- 案例:
import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #讀取X值 ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #讀取y值 ys = ys.astype(int) df_X['label'] = ys[0].values #將X按照y值的結果一一打標籤 ax = plt.axes() #在二維圖中描繪X點所處位置,直觀查看數據點的分佈情況 df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue') df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red') #提取用於學習的數據 Xs = df_X[[0, 1]].values Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs]) ys = df_X['label'].values from __future__ import print_function import numpy as np from sklearn.linear_model import LogisticRegression lr = LogisticRegression(fit_intercept=False) #因爲前面已經將截距項的值合併到變量中,此處參數設置不需要截距項 lr.fit(Xs, ys) #擬合 score = lr.score(Xs, ys) #結果評價 print("Coefficient: %s" % lr.coef_) print("Score: %s" % score) ax = plt.axes() df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue') df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red') _xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])]) #將數據以二維圖形式描點,並用學習得出的參數結果作爲閾值,劃分數據區域 _ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2]) plt.plot(_xs, _ys, lw=1) class LGR_GD(): def __init__(self): self.w = None self.n_iters = None def fit(self, X, y, alpha=0.03, loss=1e-10): # 設定步長爲0.002,判斷是否收斂的條件爲1e-10 y = y.reshape(-1, 1) # 重塑y值的維度以便矩陣運算 [m, d] = np.shape(X) # 自變量的維度 self.w = np.zeros((1, d)) # 將參數的初始值定爲0 tol = 1e5 self.n_iters = 0 # ============================= show me your code ======================= while tol > loss: #設置收斂條件 for i in range(d): temp = y - X.dot(self.w) self.w[i] = self.w[i] + alpha *np.sum(temp * X[:,i])/m tol = np.abs(np.sum(y - X.dot(self.w))) self.n_iters += 1 #更新迭代次數 # ============================= show me your code ======================= def predict(self, X): # 用已經擬合的參數值預測新自變量 y_pred = X.dot(self.w) return y_pred if __name__ == "__main__": lr_gd = LGR_GD() lr_gd.fit(Xs, ys) ax = plt.axes() df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue') df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red') _xs = np.array([np.min(Xs[:, 1]), np.max(Xs[:, 1])]) _ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2]) plt.plot(_xs, _ys, lw=1) class LGR_NT(): def __init__(self): self.w = None self.n_iters = None def fit(self, X, y, loss=1e-10): # 判斷是否收斂的條件爲1e-10 y = y.reshape(-1, 1) # 重塑y值的維度以便矩陣運算 [m, d] = np.shape(X) # 自變量的維度 self.w = np.zeros((1, d)) # 將參數的初始值定爲0 tol = 1e5 n_iters = 0 Hessian = np.zeros((d, d)) # ============================= show me your code ======================= while tol > loss: n_iters += 1 # ============================= show me your code ======================= self.w = theta self.n_iters = n_iters def predict(self, X): # 用已經擬合的參數值預測新自變量 y_pred = X.dot(self.w) return y_pred if __name__ == "__main__": lgr_nt = LGR_NT() lgr_nt.fit(Xs, ys) print("梯度下降法結果參數:%s;梯度下降法迭代次數:%s" %(lgr_gd.w,lgr_gd.n_iters)) print("牛頓法結果參數:%s;牛頓法迭代次數:%s" %(lgr_nt.w,lgr_nt.n_iters))