OpenCV-TensorFlow 入門人工智能圖像處理
樣本地址: http://yann.lecun.com/exdb/mnist/
文件 | 內容 |
---|---|
train-images-idx3-ubyte.gz | 訓練集圖片 - 55000張 訓練圖片,5000張 驗證圖片 |
train-labels-idx1-ubyte.gz | 訓練集圖片對應的數字標籤 |
t10k-images-idx3-ubyte.gz | 測試集圖片 - 10000張 圖片 |
t10k-labels-idx1-ubyte.gz | 測試集圖片對應的數字標籤 |
- 下載的4個文件放在一個文件夾,命名爲
MNIST_data
,並同代碼放在一個文件夾。
1、KNN數字識別
1.1、load Data
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
import random
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
1.2、knn test train distance 5*500=2500距離
# 屬性設置
trainNum = 55000
testNum = 10000
trainSize =500
testSize = 5
k = 4
# data 分解 1、範圍0~trainNum; 2、trainSize; 3、replace=False
trainIndex = np.random.choice(trainNum, trainSize, replace=False)
testIndex = np.random.choice(testNum, testSize, replace=False)
trainData = mnist.train.images[trainIndex] #訓練圖片;trainData= (500, 784) 500是圖片個數,圖片寬28*高28=784
trainlabel = mnist.train.labels[trainIndex] #訓練標籤;trainlabel= (500, 10)
testData = mnist.test.images[testIndex]# testData= (5, 784)
testLabel = mnist.test.labels[testIndex]# testLabel= (5, 10)
print ("trainData=",trainData.shape)
print ("trainlabel=",trainlabel.shape)
print ("testData=",testData.shape)
print ("testLabel=",testLabel.shape)
1.3、knn k個最近的5張測試圖片和500張訓練圖片做差,找到4張最近的圖片
# tf input
trainDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
trainLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
testDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
testLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
# knn distance 原5*785————>現5*1*784
# 5測試數據, 500訓練數據, 每個維度都是784(3D) 2500*784
f1 = tf.expand_dims(testDataInput, 1) #誇大一個維度
f2 = tf.subtract(trainDataInput, f1) #784 sum(784)
f3 = tf.reduce_sum(tf.abs(f2), reduction_indices=2) #完成數據累加 784
f4 = tf.negative(f3) # 取反
f5, f6 = tf.nn.top_k(f4, k=4) # 選取f4 最大的四個值
f7 = tf.gather(trainLabelInput, f6) # 根據下標所引訓練圖片的標籤
f8 = tf.reduce_sum(f7, reduction_indices=1)
f9 = tf.argmax(f8, dimension=1) # tf.argmax 選取在某一個最大的值
1.4、k個最近的圖片 ————> parse centent label
with tf.Session() as sess:
p1 = sess.run(f1, feed_dict={testDataInput:testData[0:5]})
print ("p1 = ",p1.shape) # p1 = (5, 1, 784)
p2 = sess.run(f2, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p2 = ",p2.shape) # p2 = (5, 500, 784)
p3 = sess.run(f3, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p3 = ",p3.shape) # p3 = (5, 500)
print ("p3[0, 0] = ", p3[0, 0]) # p3[0, 0] = 116.76471
p4 = sess.run(f4, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p4 = ", p4.shape) # p4 = (5, 500)
print ("p4[0, 0] = ", p4[0, 0]) # p4[0, 0] = -116.76471
p5, p6 = sess.run((f5, f6), feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p5 = ",p5.shape) # p5 = (5, 4) 每一張測試圖片(5張) 分別對應4張最近訓練圖片
print ("p6 = ",p6.shape) # p6 = (5, 4)
print ("p5[0, 0] = ", p5[0, 0]) # 這是一個隨機數
print ("p6[0, 0] = ", p6[0, 0]) # p6 index
p7 = sess.run(f7, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
print ("p7 = ", p7.shape) # p7 = (5, 4, 10)
p8 = sess.run(f8, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
print ("p8 = ", p8)
print ("p8.shape = ", p8.shape) # p8.shape = (5, 10)
p9 = sess.run(f9, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
print ("p9 = ", p9) # p9 = [3 3 2 8 2], 是p8中最大值的下標
print ("p9.shape = ", p9.shape) # p9.shape = (5,)
p10 = np.argmax(testLabel[0:5], axis=1) # 測試標籤的索引內容
print ("p10 = ", p10) # 通過比較p9和p10的結果得到統計的概率
1.5、統計監測數據的概率
#計算統計的識別正確率
j = 0
for i in range(0, 5):
if p10[i] == p9[i]:
j = j + 1
print ("本次識別正確率 =", j*100/5)
1.6、源碼彙總
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
import random
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 屬性設置
trainNum = 55000
testNum = 10000
trainSize =500
testSize = 5
k = 4
# data 分解 1、範圍0~trainNum; 2、trainSize; 3、replace=False
trainIndex = np.random.choice(trainNum, trainSize, replace=False)
testIndex = np.random.choice(testNum, testSize, replace=False)
trainData = mnist.train.images[trainIndex] #訓練圖片;trainData= (500, 784) 500是圖片個數,圖片寬28*高28=784
trainlabel = mnist.train.labels[trainIndex] #訓練標籤;trainlabel= (500, 10)
testData = mnist.test.images[testIndex]# testData= (5, 784)
testLabel = mnist.test.labels[testIndex]# testLabel= (5, 10)
print ("trainData=",trainData.shape)
print ("trainlabel=",trainlabel.shape)
print ("testData=",testData.shape)
print ("testLabel=",testLabel.shape)
# tf input
trainDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
trainLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
testDataInput = tf.placeholder(shape=[None, 784], dtype=tf.float32)# shape爲維度
testLabelInput = tf.placeholder(shape=[None, 10], dtype=tf.float32)
# knn distance 原5*785————>現5*1*784
# 5測試數據, 500訓練數據, 每個維度都是784(3D) 2500*784
f1 = tf.expand_dims(testDataInput, 1) #誇大一個維度
f2 = tf.subtract(trainDataInput, f1) #784 sum(784)
f3 = tf.reduce_sum(tf.abs(f2), reduction_indices=2) #完成數據累加 784
f4 = tf.negative(f3) # 取反
f5, f6 = tf.nn.top_k(f4, k=4) # 選取f4 最大的四個值
f7 = tf.gather(trainLabelInput, f6) # 根據下標所引訓練圖片的標籤
f8 = tf.reduce_sum(f7, reduction_indices=1)
f9 = tf.argmax(f8, dimension=1) # tf.argmax 選取在某一個最大的值
with tf.Session() as sess:
p1 = sess.run(f1, feed_dict={testDataInput:testData[0:5]})
print ("p1 = ",p1.shape) # p1 = (5, 1, 784)
p2 = sess.run(f2, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p2 = ",p2.shape) # p2 = (5, 500, 784)
p3 = sess.run(f3, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p3 = ",p3.shape) # p3 = (5, 500)
print ("p3[0, 0] = ", p3[0, 0]) # p3[0, 0] = 116.76471
p4 = sess.run(f4, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p4 = ", p4.shape) # p4 = (5, 500)
print ("p4[0, 0] = ", p4[0, 0]) # p4[0, 0] = -116.76471
p5, p6 = sess.run((f5, f6), feed_dict={trainDataInput:trainData, testDataInput:testData[0:5]})
print ("p5 = ",p5.shape) # p5 = (5, 4) 每一張測試圖片(5張) 分別對應4張最近訓練圖片
print ("p6 = ",p6.shape) # p6 = (5, 4)
print ("p5[0, 0] = ", p5[0, 0]) # 這是一個隨機數
print ("p6[0, 0] = ", p6[0, 0]) # p6 index
p7 = sess.run(f7, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
print ("p7 = ", p7.shape) # p7 = (5, 4, 10)
p8 = sess.run(f8, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
print ("p8 = ", p8)
print ("p8.shape = ", p8.shape) # p8.shape = (5, 10)
p9 = sess.run(f9, feed_dict={trainDataInput:trainData, testDataInput:testData[0:5], trainLabelInput:trainlabel})
print ("p9 = ", p9) # p9 = [3 3 2 8 2], 是p8中最大值的下標
print ("p9.shape = ", p9.shape) # p9.shape = (5,)
p10 = np.argmax(testLabel[0:5], axis=1) # 測試標籤的索引內容
print ("p10 = ", p10) # 通過比較p9和p10的結果得到統計的概率
#計算統計的識別正確率
j = 0
for i in range(0, 5):
if p10[i] == p9[i]:
j = j + 1
print ("本次識別正確率 =", j*100/5)
運行結果:
Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
trainData= (500, 784)
trainlabel= (500, 10)
testData= (5, 784)
testLabel= (5, 10)
p1 = (5, 1, 784)
p2 = (5, 500, 784)
p3 = (5, 500)
p3[0, 0] = 194.5373
p4 = (5, 500)
p4[0, 0] = -194.5373
p5 = (5, 4)
p6 = (5, 4)
p5[0, 0] = -64.77253
p6[0, 0] = 484
p7 = (5, 4, 10)
p8 = [[4. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 4. 0. 0. 0. 0. 0.]
[0. 0. 0. 4. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0. 0. 3. 0.]
[0. 0. 0. 0. 0. 0. 0. 4. 0. 0.]]
p8.shape = (5, 10)
p9 = [0 4 3 8 7]
p9.shape = (5,)
p10 = [0 4 3 8 7]
本次識別正確率 = 100.0
2、CNN實現手寫數字識別
2.1、導入安裝包
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
2.2、加載數據
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
2.3、定義tf.placeholder()
imageInput = tf.placeholder(tf.float32, [None, 784]) # 28*28=784
LabelInput = tf.placeholder(tf.float32, [None, 10])
2.4、轉換數據類型
# [None, 784] ———> M*28*28*1 2D ——— 4D 28*28 wh 1 channel
imageInputReshape = tf.reshape(imageInput, [-1, 28, 28, 1])
2.5、卷積運算
# 卷積 w0 : 卷積內核 5*5 in:1 out:32
w0 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b0 = tf.Variable(tf.constant(0.1, shape=[32]))
2.6、激勵函數+卷積運算
# imageInputReshape:M*28*28*1 w0:5,5,1,32
layer1 = tf.nn.relu(tf.nn.conv2d(imageInputReshape, w0, strides=[1, 1, 1, 1], padding='SAME') +b0 )
# M*28*28*32
# pool 採樣 ———> 數據量減少很多 M*28*28*32 => M*7*7*32
layer1_pool = tf.nn.max_pool(layer1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME')
2.7、激勵函數+乘加運算
# layer2 out:激勵函數+乘加運算 softmax:激勵函數+乘加運算
w1 = tf.Variable(tf.truncated_normal([7*7*32, 1024], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[1024]))
h_reshape = tf.reshape(layer1_pool, [-1, 7*7*32]) # M*7*7*32 ———> N*N1 3D ——> 2D
# [N*7*7*32] [7*7*32,1024] = N*1024
h1 = tf.nn.relu(tf.matmul(h_reshape, w1) + b1)
# 7.1、softMax
w2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[10])) # N*1024 1024*10 = N*10
pred = tf.nn.softmax(tf.matmul(h1, w2) + b2)
loss0 = LabelInput * tf.log(pred)
loss1 = 0
# 7.2
for m in range(0, 100):
for n in range(0, 10):
loss1 = loss1 - loss0[m,n]
loss = loss1 / 100
2.8、訓練集
train = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
2.9、運行
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
images,labels = mnist.train.next_batch(500)
sess.run(train,feed_dict={imageInput:images,LabelInput:labels})
## 檢測預測值
pred_test = sess.run(pred,feed_dict={imageInput:mnist.test.images,LabelInput:labels})
acc = tf.equal(tf.arg_max(pred_test,1),tf.arg_max(mnist.test.labels,1))
acc_float = tf.reduce_mean(tf.cast(acc,tf.float32))
acc_result = sess.run(acc_float,feed_dict={imageInput:mnist.test.images,LabelInput:mnist.test.labels})
print(acc_result)
2.10、源碼彙總
# CNN : 卷積
# 1、import
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()#兼容1.0版本
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
# 2、load data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 3、input
imageInput = tf.placeholder(tf.float32, [None, 784]) # 28*28=784
LabelInput = tf.placeholder(tf.float32, [None, 10])
# 4、data reshape
# [None, 784] ———> M*28*28*1 2D ——— 4D 28*28 wh 1 channel
imageInputReshape = tf.reshape(imageInput, [-1, 28, 28, 1])
# 5、卷積 w0 : 卷積內核 5*5 in:1 out:32
w0 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1))
b0 = tf.Variable(tf.constant(0.1, shape=[32]))
# 6、layer1:激勵函數+卷積運算
# imageInputReshape:M*28*28*1 w0:5,5,1,32
layer1 = tf.nn.relu(tf.nn.conv2d(imageInputReshape, w0, strides=[1, 1, 1, 1], padding='SAME') +b0 )
# M*28*28*32
# pool 採樣 ———> 數據量減少很多 M*28*28*32 => M*7*7*32
layer1_pool = tf.nn.max_pool(layer1, ksize=[1, 4, 4, 1], strides=[1, 4, 4, 1], padding='SAME')
# 7、layer2 out:激勵函數+乘加運算 softmax:激勵函數+乘加運算
w1 = tf.Variable(tf.truncated_normal([7*7*32, 1024], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[1024]))
h_reshape = tf.reshape(layer1_pool, [-1, 7*7*32]) # M*7*7*32 ———> N*N1 3D ——> 2D
# [N*7*7*32] [7*7*32,1024] = N*1024
h1 = tf.nn.relu(tf.matmul(h_reshape, w1) + b1)
# 7.1、softMax
w2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[10])) # N*1024 1024*10 = N*10
pred = tf.nn.softmax(tf.matmul(h1, w2) + b2)
loss0 = LabelInput * tf.log(pred)
loss1 = 0
# 7.2
for m in range(0, 100):
for n in range(0, 10):
loss1 = loss1 - loss0[m,n]
loss = loss1 / 100
# 8、train
train = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
# 9 run
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(100):
images,labels = mnist.train.next_batch(500)
sess.run(train,feed_dict={imageInput:images,LabelInput:labels})
## 檢測預測值
pred_test = sess.run(pred,feed_dict={imageInput:mnist.test.images,LabelInput:labels})
acc = tf.equal(tf.arg_max(pred_test,1),tf.arg_max(mnist.test.labels,1))
acc_float = tf.reduce_mean(tf.cast(acc,tf.float32))
acc_result = sess.run(acc_float,feed_dict={imageInput:mnist.test.images,LabelInput:mnist.test.labels})
print(acc_result)
運行結果:
Extracting MNIST_data\train-images-idx3-ubyte.gz
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
0.1581
0.1714
0.1771
0.1951
0.2065
0.2363
0.2596
0.267
0.3245
0.3308
0.3531
0.4143
0.44
0.4393
0.3842
0.4771
0.4509
0.4632
0.499
0.462
0.4652
0.5596
0.575
0.5983
0.5877
0.608
0.6139
......