TensorFlow實現簡單的車輛檢測

數據集:

UIUC Image Database for Car Detection。下載解壓之後文件目錄如圖所示,這裏我們所需的是TrainImages這個文件夾和TestImages文件夾。
這裏寫圖片描述
The download package contains the following:

  1. 1050 training images (550 car and 500 non-car images)
  2. 170 single-scale test images, containing 200 cars at roughly the same scale as in the training images
  3. 108 multi-scale test images, containing 139 cars at various scales
  4. Evaluation files
  5. README file

The images are all grey-scale and are available in raw PGM format.

算法模型

採用的是CNN網絡,各層參數如下所示。輸入是一幅40*100*1的灰度圖片,經過多次卷積和池化之後得到1*1*1的結果。

input : [None, 40, 100, 1]
conv-pool1 :
    f : 5*5
    strides : 1
    nc : 6
    padding : VALID
    maxpool : 2

conv-pool2:
    f : 5*5
    strides : 1
    nc : 16
    padding : VALID
    maxpool : 2

conv3:
    f : 5*5
    strides : 1
    nc : 32
    padding : VALID

conv4:
    f : 3*18
    strides : 1
    nc : 64
    padding : VALID

conv5:
    f : 1*1
    strides : 1
    nc : 1
    padding : VALID

output : [None, 1, 1, 1]

代碼

我的文件目錄組織結構:
    -CarDetect
          --CarDetect.py
          --datas
              ---CarData
          --models
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

#定義加載數據的函數,注意訓練數據的存儲位置
def load_carDats():
    import cv2
    import os
    file_path = './datas/CarData/TrainImages/'
    files = os.listdir(file_path)
    samples = []
    for file_name in files:
        data = cv2.imread(file_path + file_name, 0).reshape(-1) / 255
        label = 0 if file_name.split('-')[0] == 'neg' else 1
        samples.append((data, label))
    return samples
#加載數據
datas = load_carDats()
#隨機打亂數據
np.random.shuffle(datas)
#劃分數據,xs、ys 用來訓練網絡,x_test、y_test 用來測試網絡訓練效果
xs = [i[0] for i in datas[:1000]]
ys = np.reshape([i[1] for i in datas[:1000]], newshape=(-1,1))
x_test = [i[0] for i in datas[1000:]]
y_test = np.reshape([i[1] for i in datas[1000:]], newshape=(-1,1))

#----------------定義網絡中頻繁使用的函數,將其重構-----------------#
#權重變量
def weight_variables(shape):
    weights = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32)
    return tf.Variable(weights)

#偏置變量
def biase_variables(shape):
    biases = tf.constant(value=1.0, shape=shape)
    return tf.Variable(biases)

#卷積
def conv2d(x, W):
    '''計算卷積,x爲輸入層(shape=[-1,width,height,channel]),
    W爲f*f的共享權重矩陣shape=[f,f,in_layers_num, out_layers_num],
    水平和垂直方向上的步長都爲1'''
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="VALID")

#最大值池化
def max_pooling(x):
    '''計算最大值混合,x爲輸入層(一般是卷積結果)shape=[-1,width,height,channels]
    ksize爲混合pooling的核大小2*2,水平和垂直方向上的步長都爲2'''
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="VALID")

#---------------------網絡前向傳播部分------------------#
def deepnn(x, keep_prop):
    '''定義深層卷積網絡,包含了兩個卷積-混合層和三個卷積層'''
    #step1:將原始一維得得數據轉換成2維, 第一個表示樣本數,第二三個是行列,最後一個是通道數
#     x = tf.reshape(x, shape=[-1, 40, 100, 1])
    #step2:定義第一的卷積-混合層
    with tf.name_scope("conv-pooling1"):
        W_conv1 = weight_variables([5,5,1,6])
        b_conv1 = biase_variables([6])
        ret_conv1 = tf.nn.relu(conv2d(x,W_conv1) + b_conv1)  #計算卷積,並使用修正單元對卷積結果進一步處理
        ret_pooling1 = max_pooling(ret_conv1)  #執行混合操作

    #step3:定義第二個卷積-混合層
    with tf.name_scope("conv-pooling2"):
        W_conv2 = weight_variables([5,5,6,16])
        b_conv2 = biase_variables([16])
        ret_conv2 = tf.nn.relu(conv2d(ret_pooling1, W_conv2) + b_conv2)
        ret_pooling2 = max_pooling(ret_conv2)

    #step4:定義第三個卷積層
    with tf.name_scope("conv-pooling3"):
        W_conv3 = weight_variables([5,5,16,32])
        b_conv3 = biase_variables([32])
        ret_conv3 = tf.nn.relu(conv2d(ret_pooling2, W_conv3) + b_conv3)

    #step5:定義第四個卷積層
    with tf.name_scope("conv4"):
        W_conv4 = weight_variables([3,18,32,64])
        b_conv4 = biase_variables([64])
        ret_conv4 = tf.nn.relu(conv2d(ret_conv3, W_conv4) + b_conv4)

    #step6:定義第五個卷積層
    with tf.name_scope("conv5"):
        W_conv5 = weight_variables([1,1,64,1])
        b_conv5 = biase_variables([1])
        ret_conv5 = conv2d(ret_conv4, W_conv5) + b_conv5

    return ret_conv5

#---------------------訓練網絡前的準備-----------------------#
#申明輸入數據和標籤的佔位符
x = tf.placeholder(dtype=tf.float32, shape=[None,None, None, 1], name="x-input")
labels = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="y-output")

#申明棄權的佔位符
keep_prop = tf.placeholder(dtype=tf.float32, name="kprob")

#創建分類模型
ret = deepnn(x, keep_prop)
#此時的返回值是 -1*1*1*1的, 爲了得到方便運算的結果,這裏將reshape
y = tf.reshape(ret, shape=[-1,1])

#定義損失函數
with tf.name_scope("loss_function"):
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)
cost = tf.reduce_mean(loss)
#定義訓練模型(優化模型)
with tf.name_scope("optimizor"):
    train = tf.train.AdamOptimizer(0.0005).minimize(cost)

#定義驗證模型精度的方法
with tf.name_scope("accuracy"):
    y_hat = tf.nn.sigmoid(y)
    accuracy_rate = tf.abs(y_hat - labels) < 0.5
    accuracy_rate = tf.cast(accuracy_rate, dtype=tf.float32)
accuracy = tf.reduce_mean(accuracy_rate)

#--------------開始訓練網絡,並將訓練結果保存到文件中---------------#
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())  #初始化變量

for i in range(10):
    skip = 10
    for k in range(0,1000,skip):
        x_train = np.reshape(xs[k:k+skip], newshape=(-1, 40, 100, 1))
        sess.run(train, feed_dict={x:x_train, labels:ys[k:k+skip], keep_prop:0.5}) # 訓練模型
    # if (i+1) % 10 == 0:
    train_accuracy = sess.run(accuracy, feed_dict = {x: np.reshape(xs, (-1,40,100,1)), labels: ys, keep_prop:1.0})
    print('step %d, train accuracy %g' % (i, train_accuracy))
    saver.save(sess, "./models/carDetect_model.ckpt", global_step=i)

這是我的訓練結果:
step 0, train accuracy 0.859
step 1, train accuracy 0.934
step 2, train accuracy 0.965
step 3, train accuracy 0.971
step 4, train accuracy 0.985
step 5, train accuracy 0.991
step 6, train accuracy 0.995
step 7, train accuracy 0.994
step 8, train accuracy 0.995
step 9, train accuracy 0.997

#--------------------------開始在新圖片中檢測-------------------#
import cv2
#導入圖片
pic = cv2.imread("../../datas/CarData/TestImages/test-100.pgm", 0)
size = pic.shape

img  = np.reshape(pic, (-1,size[0], size[1], 1))
#利用上面訓練好的網絡,開始在新的圖片中檢測
result = sess.run(ret, feed_dict={x:img})

#將檢測結果顯示
pt1 = np.array([result.argmax()//result.shape[2], result.argmax()%result.shape[2]]) * 4
pt2 = pt1 + np.array([40, 100])

pic_2 = cv2.rectangle(pic, (pt1[1], pt1[0]), (pt2[1], pt2[0]), 0, 2)

plt.imshow(pic_2, "gray")
plt.show()

檢測結果:
這裏寫圖片描述

缺點:
由於這裏例子過於簡單,存在許多不足:

  1. 檢測窗口是固定大小的,檢測大小不一致的車輛時效果很差。
  2. 網絡使用的層數和深度都比較小,所以檢測的精度較低
  3. 訓練的數據很少,使得網絡的訓練度不夠,不具有很好的泛化能力
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章