python:使用knn訓練mnist訓練集

首先在:mnist訓練集網站上下載4組訓練測試數據:mnist測試數據
mnist測試數據集
將四組數據放入knn訓練項目下,如圖:
knn訓練數據
在MNIST_data中放入這四個文件
mnist訓練數據
然後在knn項目下新建一個knn_mnist.py文件,寫入以下代碼:

# @Time : 2019/10/24
# @File : knn_mnist.py
# @Author : Snipe
# @Contact : [email protected]
# @Software : Window10 + Python3.6 + PyCharm

import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data


# 1.load data 2 one_hot : 1 0000 1 fileName
# 2.knn下測試圖片和訓練圖像的距離計算  5*500 = 2500  784維(寬乘高)
# 3.根據2.中的距離找到knn中k個最近的圖片,我們使用5張測試圖片和500張訓練圖片做差,每1張測試圖片-->500張訓練圖像,然後在500張訓練圖像中找到4張和測試圖片最接近的訓練圖像
# 4.k個最近的圖片-->獲得它的label
# 5.label-->具體的數字
# 6.完成檢測概率的統計

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 屬性設置
trainNum = 55000
testNum = 10000
trainSize = 500
testSize = 5
k = 4
# data 分解 trainSize 範圍0-trainNum replace=False
trainIndex = np.random.choice(trainNum, trainSize, replace=False)
print(trainIndex)
testIndex = np.random.choice(testNum, testSize, replace=False)
print(testIndex)
trainData = mnist.train.images[trainIndex]    # 訓練圖片
trainLabel = mnist.train.labels[trainIndex]    # 訓練標籤
testData = mnist.test.images[testIndex]
testLabels = mnist.test.labels[testIndex]

print('trainData.shape = ', trainData.shape)    # (500,784) (圖片個數,28*28=784)  # 加載的圖片是28*28的
print('trainLabel.shape = ', trainLabel.shape)    # (500,10)
print('testData.shape = ', testData.shape)    # (5,784)
print('testLabel.shape', testLabels.shape)    # (5,10)    (代表5行10列)
print('testLabel = ', testLabels)
# tf input()
trainDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)
trainLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
testDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)
testLabelsInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
# knn distance
f1 = tf.expand_dims(testDataInput, 1)    # 5*784-->5*1*784    維度擴展
f2 = tf.subtract(trainDataInput, f1)
f3 = tf.reduce_sum(tf.abs(f2), reduction_indices=2)    # 完成數據累加 784 abs
# f3表示距離是一個5*500的矩陣,[輸入的測試圖片的下標,輸入的訓練圖片的下標]
f4 = tf.negative(f3)   # 取反
f5, f6 = tf.nn.top_k(f4, k=4)   # 選取f4中最大的4個值
# f5最小的四個值
# f6 index-->trainLabelInput
f7 = tf.gather(trainLabelInput, f6)
# f8 數字的獲取 reduce_sum()函數就是一個累加的過程,累加成一個1維的數據
f8 = tf.reduce_sum(f7, reduction_indices=1)
# 選取在某一個維度上最大的值,並記錄它的下標index
f9 = tf.argmax(f8, dimension=1)  # f9的內容就是所有的檢測圖片(本例中爲5),即5個image檢測出來的5個num

with tf.Session() as sess:
    p1 = sess.run(f1, feed_dict={testDataInput: testData[0:testSize]})
    print('p1 = ', p1.shape)  # p1 = (5,1,784)
    p2 = sess.run(f2, feed_dict={trainDataInput: trainData,
                                 testDataInput: testData[0:testSize]})
    print('p2 = ', p2.shape)   # p2 = (5,500,784) (1,100)
    p3 = sess.run(f3, feed_dict={trainDataInput: trainData,
                                 testDataInput: testData[0:testSize]})
    print('p3 = ', p3.shape)
    print('p3[0,0]', p3[0, 0])    # [0,0]座標下訓練圖像和測試圖像的距離差值

    p4 = sess.run(f4, feed_dict={trainDataInput: trainData,
                                 testDataInput: testData[0:testSize]})
    print('p4 = ', p4.shape)
    print('p4[0,0]', p4[0, 0])

    p5, p6 = sess.run((f5, f6), feed_dict={trainDataInput: trainData,
                                           testDataInput: testData[0:testSize]})
    # p5 = (5, 4)
    # p6 = (5, 4)
    print('p5 = ', p5.shape)
    print('p6 = ', p6.shape)
    print('p5[0,0]', p5[0, 0])
    print('p6[0,0]', p6[0, 0])   # p6 index

    p7 = sess.run(f7, feed_dict={trainDataInput: trainData,
                                 testDataInput: testData[0:testSize],
                                 trainLabelInput: trainLabel})
    print('p7 = ', p7.shape)   # p7 = {5,4,10}
    print('p7[ ] = ', p7)

    p8 = sess.run(f8, feed_dict={trainDataInput: trainData,
                                 testDataInput: testData[0:testSize],
                                 trainLabelInput: trainLabel})
    print('p8 = ', p8.shape)
    print('p8[ ] = ', p8)

    p9 = sess.run(f9, feed_dict={trainDataInput: trainData,
                                 testDataInput: testData[0:testSize],
                                 trainLabelInput: trainLabel})
    print('p9 = ', p9.shape)
    print('p9[ ] = ', p9)

    p10 = np.argmax(testLabels[0:testSize], axis=1)
    print('p10[] = ', p10)

j = 0
for i in range(0, testSize):
    if p10[i] == p9[i]:
        j = j+1
print('ac = ', j*100/testSize, '%')

運行結果如下:
識別結果

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章