kNN 算法實現

原創

2020-03-07 00:44

kNN Algorithm

import numpy as np
import matplotlib.pyplot as plt

raw_data_X = [[3.4, 2.3],
              [3.1, 1.8],
              [1.3, 3.4],
              [3.6, 4.7],
              [2.3, 2.9],
              [7.4, 4.7],
              [5.7, 3.5],
              [9.2, 2.5],
              [7.8, 3.4],
              [7.9, 0.8],
             ]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

X_train = np.array(raw_data_X)
y_train = np.array(raw_data_y)

X_train

array([[3.4, 2.3],
       [3.1, 1.8],
       [1.3, 3.4],
       [3.6, 4.7],
       [2.3, 2.9],
       [7.4, 4.7],
       [5.7, 3.5],
       [9.2, 2.5],
       [7.8, 3.4],
       [7.9, 0.8]])

y_train

array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# 要預測的點
x = np.array([8.1, 3.4])

plt.scatter(X_train[y_train == 0, 0], X_train[y_train == 0, 1], color='g')
plt.scatter(X_train[y_train == 1, 0], X_train[y_train == 1, 1], color='r')
plt.scatter(x[0], x[1], color='b')

<matplotlib.collections.PathCollection at 0x20ba3c947b8>

1. kNN實現過程

歐拉距離
$Dist = \sqrt{\sum_{i=1}^n(X_i^{(a)} - X_i^{(b)})^2}$

from math import sqrt

distances = []
for x_train in X_train:
    # 計算歐拉距離
    d = sqrt(np.sum((x_train - x) ** 2))
    distances.append(d)

    # 也可以如下寫法
# distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in X_train]

distances

[4.827007354458868,
 5.2497618993626745,
 6.8,
 4.684015371452148,
 5.821511831131154,
 1.4764823060233399,
 2.4020824298928622,
 1.4212670403551892,
 0.2999999999999998,
 2.607680962081059]

nearest = np.argsort(distances)
nearest

array([8, 7, 5, 6, 9, 3, 0, 1, 4, 2], dtype=int64)

# 找出距離最近的 K 個點
k = 6
topK_y = [y_train[i] for i in nearest[:k]]
topK_y

[1, 1, 1, 1, 1, 0]

from collections import Counter

# 對最近的 K 個值進行類別統計
Counter(topK_y)

Counter({1: 5, 0: 1})

votes = Counter(topK_y)

# 返回數量最多的 K 個值， 這裏 K 爲 1
votes.most_common(1)

[(1, 5)]

# 得出預測結果
predict_y = votes.most_common(1)[0][0]

predict_y

2. 使用 scikit-learn中的kNN

from sklearn.neighbors import KNeighborsClassifier

KNN_classifier = KNeighborsClassifier(n_neighbors=6)

KNN_classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=6, p=2,
           weights='uniform')

# 在 sklearn 中， predict 接受一個數組
X_predict = x.reshape(1, -1)
X_predict

array([[8.1, 3.4]])

KNN_classifier.predict(X_predict)

array([1])

y_predict = KNN_classifier.predict(X_predict)
y_predict[0]

3. 重新整理我們的 kNN 代碼

from kNN.kNN import KNNClassifier

knn_clf = KNNClassifier(k=6)

knn_clf.fit(X_train, y_train)

KNN(k=6)

y_predict = knn_clf.predict(X_predict)

y_predict

array([1])

KNNClassifier.py

import numpy as np
from math import sqrt
from collections import Counter
from metrics import accuracy_score

class KNNClassifier:
    def __init__(self, k):
        assert k >= 1, "k must be valid"
        self.k = k
        self._X_train = None
        self._y_train = None

    def fit(self, X_train, y_train):
        """根據訓練數據集X_train和y_train訓練kNN分類器"""
        assert X_train.shape[0] == y_train.shape[0], \
            "the size of X_train must be equal to the size of y_train"
        assert self.k <= X_train.shape[0], \
            "the size of X_train must be at least k."

        self._X_train = X_train
        self._y_train = y_train
        return self

    def predict(self, X_predict):
        assert self._X_train is not None and self._y_train is not None, \
            "must fit before predict!"
        assert X_predict.shape[1] == self._X_train.shape[1], \
            "the feature number of X_predict must be equal to X_train"

        y_predict = [self._predict(x_predict) for x_predict in X_predict]
        return np.array(y_predict)

    def _predict(self, x_predict):
        """給定單個待預測數據x，返回x的預測結果值"""
        assert x_predict.shape[0] == self._X_train.shape[1], \
            "the feature number of x must be equal to X_train"

        dist = [ sqrt(sum((x_train - x_predict)**2)) for x_train in self._X_train]
        nearest = np.argsort(dist)
        top_K = [ self._y_train[i] for i in nearest[:self.k]]
        votes = Counter(top_K)
        return votes.most_common(1)[0][0]

    def score(self, X_test, y_test):
        y_predict = self.predict(X_test)
        return accuracy_score(y_test, y_predict)

    def __repr__(self):
        return "KNN(k=%d)" % self.k

metrics.py

import numpy as np

def accuracy_score(y_true, y_predict):
    """計算 y_true 和 y_predict 之間的準確率"""
    assert y_true.shape[0] == y_predict.shape[0], \
        "the size of y_true must be equal to the size of y_predict"

    return sum(y_true == y_predict) / len(y_true)

項目源碼地址

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

kNN 算法實現

kNN Algorithm

1. kNN實現過程

2. 使用 scikit-learn中的kNN

3. 重新整理我們的 kNN 代碼

KNNClassifier.py

metrics.py

如何使用 JS 判斷用戶是否處於活躍狀態

通過HPA+CronHPA組合應對業務複雜彈性伸縮場景

JavaScript函數——更深入的窺探

強化學習 --- 馬爾科夫決策過程（MDP）

自編碼器重建 Fashion_mnist 數據集

jQuery學習筆記——DOM練習圖片提示效果

jQuery學習筆記——jQuery中DOM操作（1）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結