TensorFlow-YOLO_V1測試代碼梳理

TensorFlow-YOLO_V1測試代碼梳理，相信看過後對yolo-v1的整個流程都一目瞭然了。

硬件：NVIDIA-GTX1080

軟件：Windows7、python3.6.5、tensorflow-gpu-1.4.0

一、基礎知識

一圖看懂：https://download.csdn.net/download/samylee/10879852

二、代碼展示（做了詳細註釋）

1、./test.py

import cv2
import numpy as np
import tensorflow as tf
import yolo.config as cfg
from yolo.yolo_net import YOLONet

class Detector(object):

    def __init__(self, net, weight_file):
        self.net = net
        self.weights_file = weight_file
        
        # 類別
        self.classes = cfg.CLASSES
        # 類別數
        self.num_class = len(self.classes)
        # 訓練圖像大小
        self.image_size = cfg.IMAGE_SIZE
        # 單元格數量
        self.cell_size = cfg.CELL_SIZE
        # 每個單元格產生邊界框數
        self.boxes_per_cell = cfg.BOXES_PER_CELL
        # 目標閾值
        self.threshold = cfg.THRESHOLD
        # NMS-IOU閾值
        self.iou_threshold = cfg.IOU_THRESHOLD

        # 取輸出類別條件概率(:boundary1)
        self.boundary1 = self.cell_size * self.cell_size * self.num_class
        # 取輸出各邊界框置信度(boundary1:boundary2)，(boundary2:)爲各邊界框
        self.boundary2 = self.boundary1 +\
            self.cell_size * self.cell_size * self.boxes_per_cell

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        print('Restoring weights from: ' + self.weights_file)
        self.saver = tf.train.Saver()
        self.saver.restore(self.sess, self.weights_file)

    def draw_result(self, img, result):
        # xmin = xmid - width/2
	# ymin = ymid - height/2
	# xmax = xmid + width/2
	# ymax = ymax + height/2
        for i in range(len(result)):
            x = int(result[i][1])
            y = int(result[i][2])
            w = int(result[i][3] / 2)
            h = int(result[i][4] / 2)
            cv2.rectangle(img, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
            cv2.rectangle(img, (x - w, y - h - 20),
                          (x + w, y - h), (125, 125, 125), -1)
            lineType = cv2.LINE_AA if cv2.__version__ > '3' else cv2.CV_AA
            cv2.putText(
                img, result[i][0] + ' : %.2f' % result[i][5],
                (x - w + 5, y - h - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                (0, 0, 0), 1, lineType)

    def detect(self, img):
        img_h, img_w, _ = img.shape
        inputs = cv2.resize(img, (self.image_size, self.image_size))
        inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
        inputs = (inputs / 255.0) * 2.0 - 1.0
        inputs = np.reshape(inputs, (1, self.image_size, self.image_size, 3))

        result = self.detect_from_cvmat(inputs)[0]

        # 根據原始圖像縮放比例，復原原始圖像標籤
        for i in range(len(result)):
            result[i][1] *= (1.0 * img_w / self.image_size)
            result[i][2] *= (1.0 * img_h / self.image_size)
            result[i][3] *= (1.0 * img_w / self.image_size)
            result[i][4] *= (1.0 * img_h / self.image_size)

        return result

    def detect_from_cvmat(self, inputs):
        net_output = self.sess.run(self.net.logits,
                                   feed_dict={self.net.images: inputs})
        results = []
        for i in range(net_output.shape[0]):
            results.append(self.interpret_output(net_output[i]))

        return results

    def interpret_output(self, output):
        # 單元塊各類置信度 = 單元塊各類條件概率 * 單元塊各邊界框置信度 (7x7x2x20)
        probs = np.zeros((self.cell_size, self.cell_size,
                          self.boxes_per_cell, self.num_class))
        # 單元塊各類條件概率: 7x7x20
        class_probs = np.reshape(
            output[0:self.boundary1],
            (self.cell_size, self.cell_size, self.num_class))
        # 單元塊各邊界框置信度: 7x7x2
        scales = np.reshape(
            output[self.boundary1:self.boundary2],
            (self.cell_size, self.cell_size, self.boxes_per_cell))
        # 單元塊各邊界框: 7x7x2x4
        boxes = np.reshape(
            output[self.boundary2:],
            (self.cell_size, self.cell_size, self.boxes_per_cell, 4))
        
        # x_offset,y_offset: 7x7
        # ([0,0],[0,1],[0,2]...,[0,6]
        #  [1,0],[1,1],[1,2]...,[1,6]
        #  ...
        #  [6,0],[6,1],[6,2]...,[6,6])
        offset = np.array(
            [np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell)
        offset = np.transpose(
            np.reshape(
                offset,
                [self.boxes_per_cell, self.cell_size, self.cell_size]),
            (1, 2, 0))

        # x_real = (x_out + x_offset) * image_size / cell_size
        # y_real = (y_out + y_offset) * image_size / cell_size
        boxes[:, :, :, 0] += offset
        boxes[:, :, :, 1] += np.transpose(offset, (1, 0, 2)) # y_offset: 7x7x2
        boxes[:, :, :, :2] = 1.0 * boxes[:, :, :, 0:2] * (self.image_size / self.cell_size)

        # w_real = square(w_out) * image_size
        # h_real = square(h_out) * image_size
        boxes[:, :, :, 2:] = np.square(boxes[:, :, :, 2:]) * self.image_size

        # 單元塊各類置信度 = 單元塊各類條件概率 * 單元塊各邊界框置信度 (7x7x2x20)
        for i in range(self.boxes_per_cell):
            for j in range(self.num_class):
                probs[:, :, i, j] = np.multiply(
                    class_probs[:, :, j], scales[:, :, i])

        # 單元塊各類置信度(>=threshold: true, <threshold: false)
        filter_mat_probs = np.array(probs >= self.threshold, dtype='bool')
        # 找出置信度大於threshold的boxes下標
        filter_mat_boxes = np.nonzero(filter_mat_probs)
        # 找出置信度大於threshold的classes(非下標)
        filter_mat_classes = np.argmax(filter_mat_probs, axis=3)

        # 置信度大於threshold的probs
        probs_filtered = probs[filter_mat_probs]
        # 對應置信度大於threshold的boxes
        boxes_filtered = boxes[filter_mat_boxes[0],filter_mat_boxes[1], filter_mat_boxes[2]]
        # 對應置信度大於threshold的classes
        classes_filtered = filter_mat_classes[filter_mat_boxes[0], filter_mat_boxes[1], filter_mat_boxes[2]]

        # 輸出降序排列下標
        argsort = np.array(np.argsort(probs_filtered))[::-1]
        # probs_filtered降序排列
        probs_filtered = probs_filtered[argsort]
        # boxes_filtered降序排列
        boxes_filtered = boxes_filtered[argsort]
        # classes_filtered降序排列
        classes_filtered = classes_filtered[argsort]

        # NMS(所有類別一視同仁！！！有問題？？？)
        # 應該是不同類別分別做NMS！！！
        for i in range(len(boxes_filtered)):
            if probs_filtered[i] == 0:
                continue
            for j in range(i + 1, len(boxes_filtered)):
                if self.iou(boxes_filtered[i], boxes_filtered[j]) > self.iou_threshold:
                    probs_filtered[j] = 0.0

        # 取NMS的結果
        filter_iou = np.array(probs_filtered > 0.0, dtype='bool')
        boxes_filtered = boxes_filtered[filter_iou]
        probs_filtered = probs_filtered[filter_iou]
        classes_filtered = classes_filtered[filter_iou]

        result = []
        for i in range(len(boxes_filtered)):
            result.append(
                [self.classes[classes_filtered[i]],
                 boxes_filtered[i][0],
                 boxes_filtered[i][1],
                 boxes_filtered[i][2],
                 boxes_filtered[i][3],
                 probs_filtered[i]])

        return result

    # IOU = 交併比
    def iou(self, box1, box2):
        tb = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - \
            max(box1[0] - 0.5 * box1[2], box2[0] - 0.5 * box2[2])
        lr = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - \
            max(box1[1] - 0.5 * box1[3], box2[1] - 0.5 * box2[3])
        inter = 0 if tb < 0 or lr < 0 else tb * lr
        return inter / (box1[2] * box1[3] + box2[2] * box2[3] - inter)

    def image_detector(self, imname, wait=0):
        image = cv2.imread(imname)

        # detection
        result = self.detect(image)

        self.draw_result(image, result)
        cv2.imshow('Image', image)
        cv2.waitKey(wait)


def main():
    yolo = YOLONet(False)
    
    weight_file = "model/YOLO_small.ckpt"
    detector = Detector(yolo, weight_file)

    imname = 'person.jpg'
    detector.image_detector(imname)


if __name__ == '__main__':
    main()

2、./yolo/config.py

import os

CLASSES = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
           'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
           'motorbike', 'person', 'pottedplant', 'sheep', 'sofa',
           'train', 'tvmonitor']

IMAGE_SIZE = 448

CELL_SIZE = 7

BOXES_PER_CELL = 2

ALPHA = 0.1

THRESHOLD = 0.2

IOU_THRESHOLD = 0.5

3、./yolo/yolo_net.py

import numpy as np
import tensorflow as tf
import yolo.config as cfg

slim = tf.contrib.slim


class YOLONet(object):

    def __init__(self, is_training=True):
        # 類別
        self.classes = cfg.CLASSES
        # 類別數
        self.num_class = len(self.classes)
        # 訓練圖像大小
        self.image_size = cfg.IMAGE_SIZE
        # 單元格數量
        self.cell_size = cfg.CELL_SIZE
        # 每個單元格產生邊界框數
        self.boxes_per_cell = cfg.BOXES_PER_CELL
        # 網絡輸出尺寸
        self.output_size = (self.cell_size * self.cell_size) *\
            (self.num_class + self.boxes_per_cell * 5)
        # leaky_relu係數(tf.maximum(alpha*x,x))
        self.alpha = cfg.ALPHA

        # 邊界框的中心座標x: 相對於各自cell左上點的偏移量
        self.offset = np.transpose(np.reshape(np.array(
            [np.arange(self.cell_size)] * self.cell_size * self.boxes_per_cell),
            (self.boxes_per_cell, self.cell_size, self.cell_size)), (1, 2, 0))

        # 訓練or測試輸入圖像大小
        self.images = tf.placeholder(
            tf.float32, [None, self.image_size, self.image_size, 3],
            name='images')

        # 建立網絡
        self.logits = self.build_network(
            self.images, num_outputs=self.output_size, alpha=self.alpha,
            is_training=is_training)

    # conv2d(inputs, output_size, ksize, strides = [1, 1], padding = 'SAME')
    # max_pool2d(inputs, ksize, strides = [2, 2])
    def build_network(self,images,num_outputs,alpha,keep_prob=0.5,is_training=True,scope='yolo'):
        with tf.variable_scope(scope):
            # slim處理conv2d和fully_connected
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                activation_fn=leaky_relu(alpha),
                weights_regularizer=slim.l2_regularizer(0.0005),
                weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)
            ):
                # (batch, 448, 448, 3) -> (batch, 454, 454, 3)
                net = tf.pad(images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]),name='pad_1')
                net = slim.conv2d(net, 64, 7, 2, padding='VALID', scope='conv_2')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3')
                net = slim.conv2d(net, 192, 3, scope='conv_4')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5')
                net = slim.conv2d(net, 128, 1, scope='conv_6')
                net = slim.conv2d(net, 256, 3, scope='conv_7')
                net = slim.conv2d(net, 256, 1, scope='conv_8')
                net = slim.conv2d(net, 512, 3, scope='conv_9')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10')                 
                net = slim.conv2d(net, 256, 1, scope='conv_11')
                net = slim.conv2d(net, 512, 3, scope='conv_12')
                net = slim.conv2d(net, 256, 1, scope='conv_13')
                net = slim.conv2d(net, 512, 3, scope='conv_14')
                net = slim.conv2d(net, 256, 1, scope='conv_15')
                net = slim.conv2d(net, 512, 3, scope='conv_16')
                net = slim.conv2d(net, 256, 1, scope='conv_17')
                net = slim.conv2d(net, 512, 3, scope='conv_18')
                net = slim.conv2d(net, 512, 1, scope='conv_19')
                net = slim.conv2d(net, 1024, 3, scope='conv_20')
                net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21')
                net = slim.conv2d(net, 512, 1, scope='conv_22')
                net = slim.conv2d(net, 1024, 3, scope='conv_23')
                net = slim.conv2d(net, 512, 1, scope='conv_24')
                net = slim.conv2d(net, 1024, 3, scope='conv_25')
                net = slim.conv2d(net, 1024, 3, scope='conv_26')
                net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]),name='pad_27')
                net = slim.conv2d(net, 1024, 3, 2, padding='VALID', scope='conv_28')
                net = slim.conv2d(net, 1024, 3, scope='conv_29')
                net = slim.conv2d(net, 1024, 3, scope='conv_30')

                # n h w c -> n c h w
                net = tf.transpose(net, [0, 3, 1, 2], name='trans_31')
                # flat鋪平
                net = slim.flatten(net, scope='flat_32')
                
                net = slim.fully_connected(net, 512, scope='fc_33')
                net = slim.fully_connected(net, 4096, scope='fc_34')
                # dropout is_trainint默認是true，當爲false時，keep_prob爲1                
                net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training,scope='dropout_35')
                # 輸出層無需激活函數 (batch, 1470)
                net = slim.fully_connected(net, num_outputs, activation_fn=None, scope='fc_36')
        return net

def leaky_relu(alpha):
    def op(inputs):
        return tf.nn.leaky_relu(inputs, alpha=alpha, name='leaky_relu')
    return op

三、模型及測試圖像下載

鏈接：https://pan.baidu.com/s/1517r5zxY01vQgrEJUqGEfw
提取碼：xh45

下載後模型保存至./model/目錄下，圖像保存至當前路徑下

四、結果展示

五、參考

https://github.com/hizhangp/yolo_tensorflow

任何問題請加唯一QQ2258205918（名稱samylee）！

TensorFlow-YOLO_V1測試代碼梳理

Spring Cloud 部署時如何使用 Kubernetes 作爲註冊中心和配置中心

通用中文OCR-離線

Amazing行人檢測（CPU Real-time）

TensorFlow-YOLO_V1測試代碼梳理

單線程、SSE、AVX運行效率對比——最大值/最小值運算

LCNN_SSD（Open Source）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結