

UIUC Image Database for Car Detection。下載解壓之後文件目錄如圖所示,這裏我們所需的是TrainImages這個文件夾和TestImages文件夾。
The download package contains the following:

  1. 1050 training images (550 car and 500 non-car images)
  2. 170 single-scale test images, containing 200 cars at roughly the same scale as in the training images
  3. 108 multi-scale test images, containing 139 cars at various scales
  4. Evaluation files
  5. README file

The images are all grey-scale and are available in raw PGM format.



input : [None, 40, 100, 1]
conv-pool1 :
    f : 5*5
    strides : 1
    nc : 6
    padding : VALID
    maxpool : 2

    f : 5*5
    strides : 1
    nc : 16
    padding : VALID
    maxpool : 2

    f : 5*5
    strides : 1
    nc : 32
    padding : VALID

    f : 3*18
    strides : 1
    nc : 64
    padding : VALID

    f : 1*1
    strides : 1
    nc : 1
    padding : VALID

output : [None, 1, 1, 1]


import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

def load_carDats():
    import cv2
    import os
    file_path = './datas/CarData/TrainImages/'
    files = os.listdir(file_path)
    samples = []
    for file_name in files:
        data = cv2.imread(file_path + file_name, 0).reshape(-1) / 255
        label = 0 if file_name.split('-')[0] == 'neg' else 1
        samples.append((data, label))
    return samples
datas = load_carDats()
#劃分數據,xs、ys 用來訓練網絡,x_test、y_test 用來測試網絡訓練效果
xs = [i[0] for i in datas[:1000]]
ys = np.reshape([i[1] for i in datas[:1000]], newshape=(-1,1))
x_test = [i[0] for i in datas[1000:]]
y_test = np.reshape([i[1] for i in datas[1000:]], newshape=(-1,1))

def weight_variables(shape):
    weights = tf.truncated_normal(shape, stddev=0.1, dtype=tf.float32)
    return tf.Variable(weights)

def biase_variables(shape):
    biases = tf.constant(value=1.0, shape=shape)
    return tf.Variable(biases)

def conv2d(x, W):
    W爲f*f的共享權重矩陣shape=[f,f,in_layers_num, out_layers_num],
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding="VALID")

def max_pooling(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding="VALID")

def deepnn(x, keep_prop):
    #step1:將原始一維得得數據轉換成2維, 第一個表示樣本數,第二三個是行列,最後一個是通道數
#     x = tf.reshape(x, shape=[-1, 40, 100, 1])
    with tf.name_scope("conv-pooling1"):
        W_conv1 = weight_variables([5,5,1,6])
        b_conv1 = biase_variables([6])
        ret_conv1 = tf.nn.relu(conv2d(x,W_conv1) + b_conv1)  #計算卷積,並使用修正單元對卷積結果進一步處理
        ret_pooling1 = max_pooling(ret_conv1)  #執行混合操作

    with tf.name_scope("conv-pooling2"):
        W_conv2 = weight_variables([5,5,6,16])
        b_conv2 = biase_variables([16])
        ret_conv2 = tf.nn.relu(conv2d(ret_pooling1, W_conv2) + b_conv2)
        ret_pooling2 = max_pooling(ret_conv2)

    with tf.name_scope("conv-pooling3"):
        W_conv3 = weight_variables([5,5,16,32])
        b_conv3 = biase_variables([32])
        ret_conv3 = tf.nn.relu(conv2d(ret_pooling2, W_conv3) + b_conv3)

    with tf.name_scope("conv4"):
        W_conv4 = weight_variables([3,18,32,64])
        b_conv4 = biase_variables([64])
        ret_conv4 = tf.nn.relu(conv2d(ret_conv3, W_conv4) + b_conv4)

    with tf.name_scope("conv5"):
        W_conv5 = weight_variables([1,1,64,1])
        b_conv5 = biase_variables([1])
        ret_conv5 = conv2d(ret_conv4, W_conv5) + b_conv5

    return ret_conv5

x = tf.placeholder(dtype=tf.float32, shape=[None,None, None, 1], name="x-input")
labels = tf.placeholder(dtype=tf.float32, shape=[None, 1], name="y-output")

keep_prop = tf.placeholder(dtype=tf.float32, name="kprob")

ret = deepnn(x, keep_prop)
#此時的返回值是 -1*1*1*1的, 爲了得到方便運算的結果,這裏將reshape
y = tf.reshape(ret, shape=[-1,1])

with tf.name_scope("loss_function"):
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)
cost = tf.reduce_mean(loss)
with tf.name_scope("optimizor"):
    train = tf.train.AdamOptimizer(0.0005).minimize(cost)

with tf.name_scope("accuracy"):
    y_hat = tf.nn.sigmoid(y)
    accuracy_rate = tf.abs(y_hat - labels) < 0.5
    accuracy_rate = tf.cast(accuracy_rate, dtype=tf.float32)
accuracy = tf.reduce_mean(accuracy_rate)

saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())  #初始化變量

for i in range(10):
    skip = 10
    for k in range(0,1000,skip):
        x_train = np.reshape(xs[k:k+skip], newshape=(-1, 40, 100, 1))
        sess.run(train, feed_dict={x:x_train, labels:ys[k:k+skip], keep_prop:0.5}) # 訓練模型
    # if (i+1) % 10 == 0:
    train_accuracy = sess.run(accuracy, feed_dict = {x: np.reshape(xs, (-1,40,100,1)), labels: ys, keep_prop:1.0})
    print('step %d, train accuracy %g' % (i, train_accuracy))
    saver.save(sess, "./models/carDetect_model.ckpt", global_step=i)

step 0, train accuracy 0.859
step 1, train accuracy 0.934
step 2, train accuracy 0.965
step 3, train accuracy 0.971
step 4, train accuracy 0.985
step 5, train accuracy 0.991
step 6, train accuracy 0.995
step 7, train accuracy 0.994
step 8, train accuracy 0.995
step 9, train accuracy 0.997

import cv2
pic = cv2.imread("../../datas/CarData/TestImages/test-100.pgm", 0)
size = pic.shape

img  = np.reshape(pic, (-1,size[0], size[1], 1))
result = sess.run(ret, feed_dict={x:img})

pt1 = np.array([result.argmax()//result.shape[2], result.argmax()%result.shape[2]]) * 4
pt2 = pt1 + np.array([40, 100])

pic_2 = cv2.rectangle(pic, (pt1[1], pt1[0]), (pt2[1], pt2[0]), 0, 2)

plt.imshow(pic_2, "gray")



  1. 檢測窗口是固定大小的,檢測大小不一致的車輛時效果很差。
  2. 網絡使用的層數和深度都比較小,所以檢測的精度較低
  3. 訓練的數據很少,使得網絡的訓練度不夠,不具有很好的泛化能力
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.