模型結構與代碼實現
第一層:32個feature map 5x5卷積、步長爲2、最大值池化 局部相應歸一化處理(LRN) 第二層:64個feature map 3x3卷積、步長爲1、沒有池化 第三層:128個feature map 3x3卷積、步長爲1、最大值池化 局部相應歸一化處理(LRN) 扁平層操作12x12x128個神經元 輸出層操作2個神經元輸出、sigmoid激活函數 卷積層採用relu作爲激活函數。
模型解釋
卷積層深度不斷加深,用以補償分辨率下降帶來的信息損失、 LRN提升神經元競爭能力,增強最終模型的泛化能力。
通過上述簡單的卷積神經網絡,對25000張的貓狗圖像進行訓練,對卷積層1、3後面使用局部響應歸一化處理(LRN), 最終輸出二分類圖像。從測試集選擇測試圖像進行分類預測,計算準確率。
網絡模型代碼實現
def inference(input_tensor): # -----------------------第一層---------------------------- with tf.variable_scope('layer1-conv1'): # 初始化權重conv1_weights爲可保存變量,大小爲5x5,3個通道(RGB),數量爲32個 conv1_weights = tf.get_variable("weight", [5, 5, 3, 32], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) print(relu1) with tf.name_scope("layer2-pool1"): # 池化計算,調用tensorflow的max_pool函數,strides=[1,2,2,1],表示池化邊界,2個對一個生成,padding="VALID"表示不操作。 pool1 = tf.nn.max_pool(relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID") norm1 = tf.nn.lrn(pool1, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm1') # -----------------------第二層---------------------------- with tf.variable_scope("layer3-conv2"): # 同上,不過參數的有變化,根據卷積計算和通道數量的變化,64個feature maps conv2_weights = tf.get_variable("weight", [3, 3, 32, 64], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(norm1, conv2_weights, strides=[1, 2, 2, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) print(relu2) # Local Response Normalization (parameters from paper) # 128個 feature maps with tf.variable_scope("layer4-conv3"): conv3_weights = tf.get_variable("weight", [3, 3, 64, 128], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv3_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0)) conv3 = tf.nn.conv2d(relu2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME') relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases)) print(relu3) with tf.name_scope("layer5-pool2"): pool2 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') print(pool2) norm2 = tf.nn.lrn(pool2, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm2') # -----------------------全連接層---------------------------- with tf.variable_scope("fc1"): fc1 = tf.layers.flatten(norm2) fc2 = tf.layers.dense(fc1, 2, activation=tf.nn.sigmoid) return fc2
數據加載與訓練
對下載的訓練數據集根據名稱排序,分爲兩個目錄
- 文件夾0,所有貓的圖像
- 文件夾1,所有狗的圖像
使用one-hot編碼標籤 [0, 1] 表示貓 [1, 0] 表示狗
加載所有圖像數據與標籤的代碼如下:
def get_filelist(): images = [] labels = [] for root, dirs, files in os.walk('D:/images/train_data/train_img/0'): for file in files: file = 'D:/images/train_data/train_img/0/' + file images.append(file) labels.append([0, 1]) for root, dirs, files in os.walk('D:/images/train_data/train_img/1'): for file in files: file = 'D:/images/train_data/train_img/1/' + file images.append(file) labels.append([1, 0]) return np.asarray(images), np.asarray(labels, np.int32) def get_data(file_list, index, batch_size, label_list): images = [] labels = [] for i in range(index * batch_size, (1 + index) * batch_size): i = i % (len(file_list)) img = io.imread(file_list[i]) img = transform.resize(img, (100, 100)) images.append(img) labels.append(label_list[i]) return np.asarray(images, np.float32), np.asarray(labels, np.int32)
每個batch=64張圖像進行訓練,輸入圖像大小resize爲100x100x3, RGB三通道彩色圖像 訓練時候輸入圖像與標籤定義代碼如下:
# 兩個佔位符 x = tf.placeholder(tf.float32, shape=[None, 100, 100, 3], name='x') y_ = tf.placeholder(tf.float32, shape=[None, 2], name='y_')
計算損失採用交叉熵損失,使用Adam優化器進行優化,代碼實現如下:
logits = inference(x) cross_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_) loss = tf.reduce_mean(cross_loss) tf.add_to_collection('losses', loss) # 設置整體學習率爲α爲0.001 train_vars = tf.trainable_variables() train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss, var_list=train_vars)
在1050ti GPU上運行10000次迭代,會保存最後的檢查點文件、訓練與保存檢查點代碼如下:
# 設置爲gpu tf.device('/gpu:0') print("training start") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_acc = 0 test_acc = 0 train_loss = 0 for epoch in range(n_epoch): start_time = time.time() feed_img, feed_label = get_data(x_train, epoch, batch_size, y_train) _, err, ac = sess.run([train_op, loss, acc], feed_dict={x: feed_img, y_: feed_label}) if epoch % 100 == 0: print("epoch %d, loss: %.2f, ac : %.2f"%(epoch, err, ac)) saver.save(sess, "./dog_and_cat.model", global_step=10000)
使用模型進行預測
定義預測結果代碼如下:
prediction = tf.cast(tf.argmax(logits, 1), tf.float32)
對保存好的檢查點進行恢復,加載隨機測試圖像數據,調用模型進行測試,代碼如下:
with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint('.')) cat_path = "D:/images/train_data/test_img/0/" dog_path = "D:/images/train_data/test_img/1/" dogs = os.listdir(dog_path) cats = os.listdir(cat_path) count = 0 for f in dogs: if os.path.isfile(os.path. join(dog_path, f)): image = io.imread(os.path.join(dog_path, f)) copy = np.copy(image) image = transform.resize(image, (100, 100)) image = np.float32(image) image_tensor = np.expand_dims(image, 0) digit = sess.run(prediction, feed_dict={x: image_tensor}) print("predict digit : %d., actual digit : %s"%(digit[0], 0)) if digit[0] == 0: count = count + 1 cv.putText(copy, "dog", (20, 50), cv.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, (0, 0, 255), 2, 8) cv.imshow("Image Classification", copy) cv.waitKey(0) for f in cats: if os.path.isfile(os.path.join(cat_path, f)): image = io.imread(os.path.join(cat_path, f)) copy = np.copy(image) image = transform.resize(image, (100, 100)) image = np.float32(image) image_tensor = np.expand_dims(image, 0) digit = sess.run(prediction, feed_dict={x: image_tensor}) print("predict digit : %d., actual digit : %s"%(digit[0], 1)) if digit[0] == 1: count = count + 1 cv.putText(copy, "cat", (20, 50), cv.FONT_HERSHEY_SCRIPT_SIMPLEX, 1.0, (0, 0, 255), 2, 8) cv.imshow("Image Classification", copy) cv.waitKey(0) print("correct precent: %f"%(count/(len(cats)+len(dogs))))
測試運行截圖如下: