圖像處理之KNN+CNN數字識別

OpenCV-TensorFlow 入門人工智能圖像處理

樣本地址: http://yann.lecun.com/exdb/mnist/

文件 內容
train-images-idx3-ubyte.gz 訓練集圖片 - 55000張 訓練圖片,5000張 驗證圖片
train-labels-idx1-ubyte.gz 訓練集圖片對應的數字標籤
t10k-images-idx3-ubyte.gz 測試集圖片 - 10000張 圖片
t10k-labels-idx1-ubyte.gz 測試集圖片對應的數字標籤
  • 下載的4個文件放在一個文件夾,命名爲MNIST_data ,並同代碼放在一個文件夾。

1、KNN數字識別

1.1、load Data

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
import random
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

1.2、knn test train distance 5*500=2500距離

# 屬性設置
trainNum = 55000
testNum = 10000
trainSize =500
testSize = 5
k = 4
# data 分解  1、範圍0~trainNum; 2、trainSize; 3、replace=False
trainIndex = np.random.choice(trainNum, trainSize, replace=False)
testIndex = np.random.choice(testNum, testSize, replace=False)
trainData = mnist.train.images[trainIndex] #訓練圖片;trainData= (500, 784) 500是圖片個數,圖片寬28*高28=784
trainlabel = mnist.train.labels[trainIndex] #訓練標籤;trainlabel= (500, 10)
testData = mnist.test.images[testIndex]# testData= (5, 784)
testLabel = mnist.test.labels[testIndex]# testLabel= (5, 10)
print ("trainData=",trainData.shape)
print ("trainlabel=",trainlabel.shape)
print ("testData=",testData.shape)
print ("testLabel=",testLabel.shape)

1.3、knn k個最近的5張測試圖片和500張訓練圖片做差,找到4張最近的圖片

# tf input
trainDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
trainLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
testDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
testLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)

# knn distance 原5*785————>現5*1*784
# 5測試數據, 500訓練數據, 每個維度都是784(3D) 2500*784
f1 = tf.expand_dims(testDataInput, 1) #誇大一個維度
f2 = tf.subtract(trainDataInput, f1) #784 sum(784)
f3 = tf.reduce_sum(tf.abs(f2), reduction_indices=2) #完成數據累加 784
f4 = tf.negative(f3) # 取反
f5, f6 = tf.nn.top_k(f4, k=4) # 選取f4 最大的四個值
f7 = tf.gather(trainLabelInput, f6) # 根據下標所引訓練圖片的標籤
f8 = tf.reduce_sum(f7, reduction_indices=1)
f9 = tf.argmax(f8, dimension=1) # tf.argmax 選取在某一個最大的值

1.4、k個最近的圖片 ————> parse centent label

with tf.Session() as sess:
    p1 = sess.run(f1, feed_dict={testDataInput:testData[0:5]})
    print ("p1 = ",p1.shape) # p1 =  (5, 1, 784)
    p2 = sess.run(f2, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p2 = ",p2.shape) # p2 =  (5, 500, 784)
    p3 = sess.run(f3, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p3 = ",p3.shape) # p3 =  (5, 500)
    print ("p3[0, 0] = ", p3[0, 0]) # p3[0, 0] =  116.76471
    p4 = sess.run(f4, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p4 = ", p4.shape) # p4 =  (5, 500)
    print ("p4[0, 0] = ", p4[0, 0]) # p4[0, 0] =  -116.76471
    p5, p6 = sess.run((f5, f6), feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p5 = ",p5.shape) # p5 =  (5, 4)  每一張測試圖片(5張) 分別對應4張最近訓練圖片
    print ("p6 = ",p6.shape) # p6 =  (5, 4)
    print ("p5[0, 0] = ", p5[0, 0]) # 這是一個隨機數
    print ("p6[0, 0] = ", p6[0, 0]) # p6 index
    p7 = sess.run(f7, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
    print ("p7 = ", p7.shape) # p7 =  (5, 4, 10)
    p8 = sess.run(f8, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
    print ("p8 = ", p8)
    print ("p8.shape = ", p8.shape) # p8.shape =  (5, 10)
    p9 = sess.run(f9, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
    print ("p9 = ", p9) # p9 =  [3 3 2 8 2], 是p8中最大值的下標
    print ("p9.shape = ", p9.shape) # p9.shape =  (5,)
    p10 = np.argmax(testLabel[0:5], axis=1) # 測試標籤的索引內容
    print ("p10 = ", p10) # 通過比較p9和p10的結果得到統計的概率

1.5、統計監測數據的概率

#計算統計的識別正確率
j = 0
for i in range(0, 5):
    if p10[i] == p9[i]:
        j = j + 1
print ("本次識別正確率 =", j*100/5)  

1.6、源碼彙總

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
import random
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 屬性設置
trainNum = 55000
testNum = 10000
trainSize =500
testSize = 5
k = 4
# data 分解  1、範圍0~trainNum; 2、trainSize; 3、replace=False
trainIndex = np.random.choice(trainNum, trainSize, replace=False)
testIndex = np.random.choice(testNum, testSize, replace=False)
trainData = mnist.train.images[trainIndex] #訓練圖片;trainData= (500, 784) 500是圖片個數,圖片寬28*高28=784
trainlabel = mnist.train.labels[trainIndex] #訓練標籤;trainlabel= (500, 10)
testData = mnist.test.images[testIndex]# testData= (5, 784)
testLabel = mnist.test.labels[testIndex]# testLabel= (5, 10)
print ("trainData=",trainData.shape)
print ("trainlabel=",trainlabel.shape)
print ("testData=",testData.shape)
print ("testLabel=",testLabel.shape)

# tf input
trainDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
trainLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
testDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
testLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)

# knn distance 原5*785————>現5*1*784
# 5測試數據, 500訓練數據, 每個維度都是784(3D) 2500*784
f1 = tf.expand_dims(testDataInput, 1) #誇大一個維度
f2 = tf.subtract(trainDataInput, f1) #784 sum(784)
f3 = tf.reduce_sum(tf.abs(f2), reduction_indices=2) #完成數據累加 784
f4 = tf.negative(f3) # 取反
f5, f6 = tf.nn.top_k(f4, k=4) # 選取f4 最大的四個值
f7 = tf.gather(trainLabelInput, f6) # 根據下標所引訓練圖片的標籤
f8 = tf.reduce_sum(f7, reduction_indices=1)
f9 = tf.argmax(f8, dimension=1) # tf.argmax 選取在某一個最大的值

with tf.Session() as sess:
    p1 = sess.run(f1, feed_dict={testDataInput:testData[0:5]})
    print ("p1 = ",p1.shape) # p1 =  (5, 1, 784)
    p2 = sess.run(f2, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p2 = ",p2.shape) # p2 =  (5, 500, 784)
    p3 = sess.run(f3, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p3 = ",p3.shape) # p3 =  (5, 500)
    print ("p3[0, 0] = ", p3[0, 0]) # p3[0, 0] =  116.76471
    p4 = sess.run(f4, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p4 = ", p4.shape) # p4 =  (5, 500)
    print ("p4[0, 0] = ", p4[0, 0]) # p4[0, 0] =  -116.76471
    p5, p6 = sess.run((f5, f6), feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
    print ("p5 = ",p5.shape) # p5 =  (5, 4)  每一張測試圖片(5張) 分別對應4張最近訓練圖片
    print ("p6 = ",p6.shape) # p6 =  (5, 4)
    print ("p5[0, 0] = ", p5[0, 0]) # 這是一個隨機數
    print ("p6[0, 0] = ", p6[0, 0]) # p6 index
    p7 = sess.run(f7, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
    print ("p7 = ", p7.shape) # p7 =  (5, 4, 10)
    p8 = sess.run(f8, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
    print ("p8 = ", p8)
    print ("p8.shape = ", p8.shape) # p8.shape =  (5, 10)
    p9 = sess.run(f9, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
    print ("p9 = ", p9) # p9 =  [3 3 2 8 2], 是p8中最大值的下標
    print ("p9.shape = ", p9.shape) # p9.shape =  (5,)
    p10 = np.argmax(testLabel[0:5], axis=1) # 測試標籤的索引內容
    print ("p10 = ", p10) # 通過比較p9和p10的結果得到統計的概率
#計算統計的識別正確率
j = 0
for i in range(0, 5):
    if p10[i] == p9[i]:
        j = j + 1
print ("本次識別正確率 =", j*100/5)    

運行結果:

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
trainData= (500, 784)
trainlabel= (500, 10)
testData= (5, 784)
testLabel= (5, 10)
p1 =  (5, 1, 784)
p2 =  (5, 500, 784)
p3 =  (5, 500)
p3[0, 0] =  194.5373
p4 =  (5, 500)
p4[0, 0] =  -194.5373
p5 =  (5, 4)
p6 =  (5, 4)
p5[0, 0] =  -64.77253
p6[0, 0] =  484
p7 =  (5, 4, 10)
p8 =  [[4. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 4. 0. 0. 0. 0. 0.]
 [0. 0. 0. 4. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 3. 0.]
 [0. 0. 0. 0. 0. 0. 0. 4. 0. 0.]]
p8.shape =  (5, 10)
p9 =  [0 4 3 8 7]
p9.shape =  (5,)
p10 =  [0 4 3 8 7]
本次識別正確率 = 100.0

2、CNN實現手寫數字識別

2.1、導入安裝包

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

2.2、加載數據

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

2.3、定義tf.placeholder()

imageInput = tf.placeholder(tf.float32, [None, 784]) # 28*28=784
LabelInput = tf.placeholder(tf.float32, [None, 10])

2.4、轉換數據類型

# [None, 784] ———> M*28*28*1  2D ——— 4D  28*28 wh 1 channel
imageInputReshape = tf.reshape(imageInput, [-1, 28, 28, 1])

2.5、卷積運算

# 卷積 w0 : 卷積內核 5*5 in:1  out:32
w0 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b0 = tf.Variable(tf.constant(0.1, shape=[32]))

2.6、激勵函數+卷積運算

# imageInputReshape:M*28*28*1  w0:5,5,1,32
layer1 = tf.nn.relu(tf.nn.conv2d(imageInputReshape, w0, strides=[1, 1, 1, 1], padding='SAME') +b0 )
# M*28*28*32
# pool 採樣 ———> 數據量減少很多 M*28*28*32 => M*7*7*32
layer1_pool = tf.nn.max_pool(layer1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME')

2.7、激勵函數+乘加運算

# layer2 out:激勵函數+乘加運算    softmax:激勵函數+乘加運算
w1 = tf.Variable(tf.truncated_normal([7*7*32, 1024], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[1024]))
h_reshape = tf.reshape(layer1_pool, [-1, 7*7*32]) # M*7*7*32 ———> N*N1   3D ——> 2D
# [N*7*7*32] [7*7*32,1024] = N*1024
h1 = tf.nn.relu(tf.matmul(h_reshape, w1) + b1)
# 7.1、softMax
w2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[10])) # N*1024 1024*10 = N*10
pred = tf.nn.softmax(tf.matmul(h1, w2) + b2)
loss0 = LabelInput * tf.log(pred)
loss1 = 0
# 7.2
for m in range(0, 100):
    for n in range(0, 10):
        loss1 = loss1 - loss0[m,n]
loss = loss1 / 100

2.8、訓練集

train = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

2.9、運行

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(100):
        images,labels = mnist.train.next_batch(500)
        sess.run(train,feed_dict={imageInput:images,LabelInput:labels})
        ## 檢測預測值 
        pred_test = sess.run(pred,feed_dict={imageInput:mnist.test.images,LabelInput:labels})
        acc = tf.equal(tf.arg_max(pred_test,1),tf.arg_max(mnist.test.labels,1))
        acc_float = tf.reduce_mean(tf.cast(acc,tf.float32))
        acc_result = sess.run(acc_float,feed_dict={imageInput:mnist.test.images,LabelInput:mnist.test.labels})
        print(acc_result)

2.10、源碼彙總

# CNN : 卷積
# 1、import
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
# 2、load data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 3、input
imageInput = tf.placeholder(tf.float32, [None, 784]) # 28*28=784
LabelInput = tf.placeholder(tf.float32, [None, 10])
# 4、data reshape
# [None, 784] ———> M*28*28*1  2D ——— 4D  28*28 wh 1 channel
imageInputReshape = tf.reshape(imageInput, [-1, 28, 28, 1])
# 5、卷積 w0 : 卷積內核 5*5 in:1  out:32
w0 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b0 = tf.Variable(tf.constant(0.1, shape=[32]))

# 6、layer1:激勵函數+卷積運算
# imageInputReshape:M*28*28*1  w0:5,5,1,32
layer1 = tf.nn.relu(tf.nn.conv2d(imageInputReshape, w0, strides=[1, 1, 1, 1], padding='SAME') +b0 )
# M*28*28*32
# pool 採樣 ———> 數據量減少很多 M*28*28*32 => M*7*7*32
layer1_pool = tf.nn.max_pool(layer1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME')

# 7、layer2 out:激勵函數+乘加運算    softmax:激勵函數+乘加運算
w1 = tf.Variable(tf.truncated_normal([7*7*32, 1024], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[1024]))
h_reshape = tf.reshape(layer1_pool, [-1, 7*7*32]) # M*7*7*32 ———> N*N1   3D ——> 2D
# [N*7*7*32] [7*7*32,1024] = N*1024
h1 = tf.nn.relu(tf.matmul(h_reshape, w1) + b1)
# 7.1、softMax
w2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[10])) # N*1024 1024*10 = N*10
pred = tf.nn.softmax(tf.matmul(h1, w2) + b2)
loss0 = LabelInput * tf.log(pred)
loss1 = 0
# 7.2
for m in range(0, 100):
    for n in range(0, 10):
        loss1 = loss1 - loss0[m,n]
loss = loss1 / 100

# 8、train
train = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

# 9 run
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(100):
        images,labels = mnist.train.next_batch(500)
        sess.run(train,feed_dict={imageInput:images,LabelInput:labels})
        ## 檢測預測值 
        pred_test = sess.run(pred,feed_dict={imageInput:mnist.test.images,LabelInput:labels})
        acc = tf.equal(tf.arg_max(pred_test,1),tf.arg_max(mnist.test.labels,1))
        acc_float = tf.reduce_mean(tf.cast(acc,tf.float32))
        acc_result = sess.run(acc_float,feed_dict={imageInput:mnist.test.images,LabelInput:mnist.test.labels})
        print(acc_result)

運行結果:

Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
0.1581
0.1714
0.1771
0.1951
0.2065
0.2363
0.2596
0.267
0.3245
0.3308
0.3531
0.4143
0.44
0.4393
0.3842
0.4771
0.4509
0.4632
0.499
0.462
0.4652
0.5596
0.575
0.5983
0.5877
0.608
0.6139
......
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章