# -*- coding: utf-8 -*-
"""
Created on Fri Aug 3 14:05:33 2018
@author: Administrator
"""
'''
基於KNN的有監督分類
'''
import numpy as np
import sklearn.neighbors as sn
import matplotlib.pyplot as plt
train_x, train_y = [], []
with open('knn.txt', 'r') as f:
for line in f.readlines():
data = [float(substr) for substr in line[:-1].split(',')]
train_x.append(data[:-1])
train_y.append(data[-1])
train_x = np.array(train_x)
train_y = np.array(train_y, dtype=int)
# KNN_Classifier模型
model = sn.KNeighborsClassifier(n_neighbors=10,
weights='distance'
)
'''
KNN模型區別於其他模型的地方之一:模型訓練屬於惰性學習,只保存數據,
在預測時才進行計算近鄰距離, 即模型訓練和預測在.predict階段同時完成
'''
model.fit(train_x, train_y)
l, r, h = train_x[:, 0].min() - 1, train_x[:, 0].max() + 1, 0.005
b, t, v = train_x[:, 1].min() - 1, train_x[:, 1].max() + 1, 0.005
grid_x = np.meshgrid(np.arange(l, r, h), np.arange(b, t, v))
flat_x = np.c_[grid_x[0].ravel(), grid_x[1].ravel()] # 轉化爲模型輸入和輸出需要的格式,一行一樣本,一列一標籤
flat_y = model.predict(flat_x)
grid_y = flat_y.reshape(grid_x[0].shape)
# test_set
test_x = np.array([[2.2,6.2],[3.6,1.8],[4.5,3.6]])
pred_test_y = model.predict(test_x)
'''
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
Finds the K-neighbors of a point.
Returns indices of and distances to the neighbors of each point. (注意return值順序)
'''
nn_distance, nn_indices = model.kneighbors(test_x)
print('nn_distance:',nn_distance,'nn_indices:',nn_indices, sep='\n')
#nn_distance:
#[[0.4936598 0.64776539 0.66370174 0.73824115 0.73979727 0.74094534
# 0.74953319 0.79195959 0.80305666 0.82292162]
# [0.43324358 0.57384667 0.57801384 0.65 0.76321688 0.83677954
# 0.8832327 0.9060905 0.94894678 1.00404183]
# [0.55542776 0.75213031 0.7823682 0.93150416 0.99322706 1.11305885
# 1.11682586 1.12361025 1.1461239 1.18680243]]
#nn_indices:
#[[139 25 91 118 124 40 82 142 52 22]
# [132 111 47 102 95 87 42 138 39 140]
# [ 92 79 119 64 128 131 44 77 23 83]]
plt.figure('KNN Nearest Neighbors', facecolor='lightgray')
plt.title('KNN Nearest Neighbors', fontsize=14)
plt.xlabel('x', fontsize=14)
plt.ylabel('y', fontsize=14)
plt.tick_params(labelsize=10)
# 繪製僞彩圖
plt.pcolormesh(grid_x[0], grid_x[1], grid_y, cmap='gray') # gray_r 逆向色帶
'''
返回類別數組中的去重類別:
def unique(ar, return_index=False, return_inverse=False, return_counts=False, axis=None):
Find the unique elements of an array.
Returns the sorted unique elements of an array.
默認返回經過排序後的唯一元素數組。(可接受其他return值,需要設置True)
'''
classes = np.unique(train_y) # [0 1 2] ,類別去重並排序, 不需要calsses.sort()
#classes.sort()
'''
構建色級,注意用法:plt.get_cmap(..)(..)
個人理解:
plt.get_cmap('brg', len(classes)) : <class 'matplotlib.colors.LinearSegmentedColormap'>
plt.get_cmap(name=.., lut=..): 色級
如果lut非空,則lut必須是查找表中需要的條目的整數,且name必須是表中的colormap的名字
plt.get_cmap(..)(range(n)): ---> type: numpy.array
n爲數組的長度,不能小於色級的條目數lut, 最佳是 == 分類類別數(類別和色級帶一一對應),
n < lut, 後續會報錯index is out of bouds
'''
cs = plt.get_cmap('brg', len(classes))(range(len(classes))) # 數組,調用色帶正常的數組索引即可
print(type(cs)) # <class 'numpy.ndarray'>
'''
繪製分類且對應顏色標記的散點圖
'''
plt.scatter(train_x[:, 0], train_x[:, 1], c=cs[train_y], s=30)
plt.scatter(test_x[:, 0], test_x[:, 1], c=cs[pred_test_y], s=30, marker='+', )
'''
找出每個測試點周圍的n_neighbors個臨近點 並進行與測試點顏色對應的色彩標記
'''
for nn_indice, y in zip(nn_indices, pred_test_y):
plt.scatter(train_x[nn_indice, 0], # x座標
train_x[nn_indice, 1], # x座標
edgecolor=cs[np.ones_like(nn_indice)*y], # np.ones_like(..)*y: 近鄰點顏色與test點保持一致
facecolor='none', # 填充色空白
marker='D', s=70
)
print(nn_indice)
print(y, np.ones_like(nn_indice)*y)
# [139 25 91 118 124 40 82 142 52 22] -- nn_indice
# 1 [1 1 1 1 1 1 1 1 1 1]
# [132 111 47 102 95 87 42 138 39 140] -- nn_indice
# 0 [0 0 0 0 0 0 0 0 0 0]
# [ 92 79 119 64 128 131 44 77 23 83] -- nn_indice
# 2 [2 2 2 2 2 2 2 2 2 2]
plt.show()
'''
備註:
def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
"""Finds the K-neighbors of a point.
Returns indices of and distances to the neighbors of each point.
Parameters
----------
X : array-like, shape (n_query, n_features), \
or (n_query, n_indexed) if metric == 'precomputed'
The query point or points.
If not provided, neighbors of each indexed point are returned.
In this case, the query point is not considered its own neighbor.
n_neighbors : int
Number of neighbors to get (default is the value
passed to the constructor).
return_distance : boolean, optional. Defaults to True.
If False, distances will not be returned
Returns
-------
dist : array
Array representing the lengths to points, only present if
return_distance=True
ind : array
Indices of the nearest points in the population matrix.
model = sn.KNeighborsClassifier
Parameters
----------
n_neighbors : int, optional (default = 5)
Number of neighbors to use by default for :meth:`kneighbors` queries.
weights : str or callable, optional (default = 'uniform')
weight function used in prediction. Possible values:
- 'uniform' : uniform weights. All points in each neighborhood
are weighted equally.
- 'distance' : weight points by the inverse of their distance.
in this case, closer neighbors of a query point will have a
greater influence than neighbors which are further away.
- [callable] : a user-defined function which accepts an
array of distances, and returns an array of the same shape
containing the weights.
algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
Algorithm used to compute the nearest neighbors:
- 'ball_tree' will use :class:`BallTree`
- 'kd_tree' will use :class:`KDTree`
- 'brute' will use a brute-force search.
- 'auto' will attempt to decide the most appropriate algorithm
based on the values passed to :meth:`fit` method.
Note: fitting on sparse input will override the setting of
this parameter, using brute force.
def get_cmap(name=None, lut=None):
"""
Get a colormap instance, defaulting to rc values if *name* is None.
Colormaps added with :func:`register_cmap` take precedence over
built-in colormaps.
If *name* is a :class:`matplotlib.colors.Colormap` instance, it will be
returned.
If *lut* is not None it must be an integer giving the number of
entries desired in the lookup table, and *name* must be a standard
mpl colormap name.
'''
機器學習sklearn基礎(2):基於KNN分類器的有監督分類模型 (數組去重方法np.unique()以及色級構建說明plt.get_cmap(..)(..))
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.