簡單線性迴歸——梯度下降代碼實現
[Python] 純文本查看 複製代碼
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | import numpy as np import matplotlib as plt # 1. 導入數據(我們這裏用一組現成的數據----data.csv) points = np.genfromtxt( "data.csv" ,delimiter = ',' ) N = len (points) # 提取 x, y # 每行的第一個列 x = np.array(points[:, 0 ]) # 每行的第二列 y = np.array(points[:, 1 ]) # 用 plt 做出散點圖 plt.scatter(x,y) plt.show # 2. 定義損失函數 # 定義線性模式 y = mx + b def compute_cost(points, b, m): total_cost = 0 N = len (points) # 計算平方損失函數(計算所有誤差平方和) # sum(y - mx - b)^2 for i in range (N): x = points[i, 0 ] y = points[i, 1 ] total_cost = total_cost + (y - m * x - b) * * 2 # 返回平均誤差 reutrn total_cost / float (N) # 3. 定義模型的超參數 # 包括:步長(學習速率),初始點,迭代次數 learning_rate = 0.0001 initial_b = 0 initial_m = 0 num_iteration = 10 # 4. 實現核心算法(梯度下降) def gradient_descent(points, initial_b, initial_m, learning_rate,num_iteration): b = initial_b m = initial_m # 用一個 list 記錄所有的損失函數值 cost_list = [] for i in range (num_iteration): # 計算損失函數 cost_list.append(compute_cost(points, b, m)) b, m = step_grad_desc(b, m, np.array(points), learning_rate) return [b, m, cost_list] # 每一步的梯度下降 def step_grad_desc(current_b, current_m, points, learning_rate): m_grade = 0 b_grade = 0 N = len (points) for i in range (N): x = points[i, 0 ] y = points[i, 1 ] m_grade + = (current_m * x + current_b - y) * x b_grade + = current_m * x + current_b m_update = current_m - learning_rate * m_grade * ( 2 / float (N)) b_update = current_b - learning_rate * b_grade * ( 2 / float (N)) return b_update, m_update # 5. 測試:運行梯度下降函數來計算最優 m, b b, m, cost_list = gradient_descent(points, initial_b,initial_m, learning_rate, num_iteration) print ( "final m is: " , m) print ( "final b is: " , b) print (cost_list) # 6. 測一下損失函數 print ( "final cost: " , compute_cost(points, b, m)) # 7. 畫出損失函數隨着迭代下降的過程 plt.plot(cost_list) plt.show() # 8. 畫出擬合曲線 plt.scatter(x, y) y_pred = m * x + b plt.plot(x, y_pred, c = 'r' ) plt.show() |
K 近鄰(KNN)算法(找自己最近的鄰居)
最簡單最初級的分類器,就是將全部的訓練數據所對應的類別都記錄下來
當測試對象的屬性和某個訓練對象的屬性完全匹配時,便可以對其進行分類
KNN 是一種基本分類方法,通過測量不同特徵值之間的距離進行分類
如果一個樣本在特徵空間中的 k 哥最相似的樣本中的大數據屬於一個類別,則該樣本也屬於這個類別
其中 k 通常是不大於 20 的證書
KNN 算法中,所選擇的鄰居都是已經正確的分類的對象
KNN 算法的結果很大程度取決於K的選擇
K 一般選擇奇數
[Python] 純文本查看 複製代碼
01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 | # 添加依賴 import numpy as np # 數值計算,數值分析庫 import pandas as pd # 裏面有實例數據 from sklearn.datasets import load_iris # 切分數據集(訓練集|數據集) from sklearn.model_selection import train_test_split # 準確度評分(計算分類數據的預測準確度) from sklearn.metrics import accuracy_score if __name__ = = '__main__' : # 數據加載和預處理(json格式) iris = load_iris() # print(iris) df = pd.DataFrame(data = iris.data, columns = iris.feature_names) # 添加一個屬性【類別】標籤 df[ 'class' ] = iris.target df[ 'class' ] = df[ 'class' ]. map ({ 0 : iris.target_names[ 0 ], 1 : iris.target_names[ 1 ], 2 : iris.target_names[ 2 ]}) # 默認返回5條數據 print (df.head()) # 查看描述(統計數據) print (df.describe()) # 二維數組數據 x = iris.data # 分類結果列表(重新排列成二維數組) y = iris.target.reshape( - 1 , 1 ) print (x.shape, y.shape) # (150, 4) (150, 1) # 劃分訓練集和測試集 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3 , random_state = 42 , stratify = y) print ( "x_train.shape, y_train.shape" ) print (x_train.shape, y_train.shape) print ( "x_test.shape, y_test.shape" ) print (x_test.shape, y_test.shape) # 真正的算法實現 # 距離函數 def l1_dis(a, b): """計算2個一維向量距離,axis 表示求和之後最後形成的是1列(0表示行)""" return np. sum (np. abs (a - b), axis = 1 ) def l2_dis(a, b): return np.sqrt(np. sum ((a - b) * * 2 , axis = 1 )) # 分類器的實現 class kNN( object ): def __init__( self , k_neighbors = 1 , disc_func = l1_dis): """類的構造方法""" self .k_neighbors = k_neighbors self .disc_func = disc_func def fit( self , x, y): self .x_train = x self .y_train = y def predict( self , test): # 預測數組初始化爲0 y_pred = np.zeros((test.shape[ 0 ], 1 ), dtype = self .y_train.dtype) for i, x_test in enumerate (test): # 計算矩陣距離 distances = self .disc_func( self .x_train, x_test) # 按距離大小排序(取出對應索引值) nn_index = np.argsort(distances) # 取前 k 個值,計算分類頻率 nn_pred = self .y_train[nn_index[: self .k_neighbors]].ravel() y_pred[i] = np.argmax(np.bincount(nn_pred)) return y_pred # 測試 knn = kNN(k_neighbors = 5 ) knn.fit(x_train, y_train) y_pred = knn.predict(x_test) # print("y_pred = {}".format(y_pred)) print ( "分類準確率:{}%" . format (accuracy_score(y_test, y_pred) * 100 )) |
文章來源於公總號黑馬程序員廣州中心(itheimagz)更多資源請關注