前言

本次利用Tensorflow-Keras实现YOLO-V3模型，目的是自己动手实现YOLO-V3的重要结构，这样才能更为深入了解模型以及提升编程能力；略去了不太重要的结构，如tiny版本就没有继续实现；
重点研究了模型结构，在levio作图的基础上增加了各层的参数设置，以图将模型展示的更加清晰明了；
实现了较为细节也很重要的非最大值抑制、loss函数、如何生成true label等；
另外做了些测试：图像目标检测、视频实时目标检测测试；
详细代码步骤见：https://github.com/Teslaxhub/YOLO_V3_Tensorflow_Keras_from_scratch 含测试样例；
转图请留言并注明出处，谢谢；
如有错误，欢迎指正。

部分代码

import tensorflow as tf
import tensorflow.keras.backend as K
import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers  import Flatten, Concatenate
from tensorflow.keras import Model, Input
from tensorflow.keras.losses import binary_crossentropy

import numpy as np
import pandas as pd

import cv2 as cv
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

import os
from time import time

def preprocesing_image(file_path):
    '''
    处理流程：读取 --> resize至模型的输入尺寸 --> 归一化为[0,1]--> 升维,增加batch维度
    
    Returns: 
    --------
    img_resized_array: shape(1,416,416,3)
    '''
    
    
    img = Image.open(file_path)
    img_resized = img.resize((416,416))
    img_resized_array = np.asarray(img_resized)
    img_resized_array = img_resized_array / 255.
    img_resized_array = np.expand_dims(img_resized_array, 0)
    
    return img_resized_array


def load_anchors(file_path):
    '''
    加载anchors文件
    
    Returns：
    -------
    anchors: shape(9,2)
    '''
    with open(file_path) as f:
        anchors = f.readline()
    anchors = np.array([int(item.strip()) for item in anchors.split(',')]).reshape([-1,2])
    
    return anchors




def load_class_name(file_path):
    '''
    加载类别文件
    
    Returns：
    -------
    class_name: 类型为list
    '''
    with open(file_path) as f:
        class_name = f.readlines()
    class_name = [item.strip() for item in class_name]
    
    return class_name



def sigmoid(x):
    y = 1 / (1+np.exp(-x))
    return y





def iou(box1, box2):
    
    '''
    box1、box2: xmin,ymin,xmax,ymax
    
    Returns
    -------
    iou_score: float,shape(0)

    '''
    
    intersect_x_min = np.max([box1[0], box2[0]])
    intersect_y_min = np.max([box1[1], box2[1]])
    intersect_x_max = np.min([box1[2], box2[2]])
    intersect_y_max = np.min([box1[3], box2[3]])
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    return iou_score





def iou_batch_array(box1, box2):
    
    '''
    Parameters
    ----------
    box: shape(batch,n,n,3,4)
    
    
    Returns
    --------
    iou_score: shape(batch,n,n,3)
    '''
    intersect_x_min = K.maximum(box1[..., 0], box2[..., 0])
    intersect_y_min = K.maximum(box1[..., 1], box2[..., 1])
    intersect_x_max = K.minimum(box1[..., 2], box2[..., 2])
    intersect_y_max = K.minimum(box1[..., 3], box2[..., 3])
    
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    
    box1_area = (box1[..., 2] - box1[..., 0]) * (box1[..., 3] - box1[..., 1])
    box2_area = (box2[..., 2] - box2[..., 0]) * (box2[..., 3] - box2[..., 1])
    
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    
    return iou_score




def non_max_suspension(scores, boxes, iou_threshold, max_box):
    '''
    Parameters
    ----------
    scores: 某一个class类别下，shape(n,1)
    boxes: 某一个class类别下，shape(n,4)
    iou_threshold: iou阈值，剔除高于阈值的box
    max_box: TODO, 该class类别下，保留最多几个。
    
    Returns
    -------
    box_collect: 某个类别下，做非最大值抑制后，保留下来的box，[(xmin,ymin,xmax,ymax,score),...]
    
    '''
    
    # 用于收集确认的box
    box_collect = []
    box_score_array = np.concatenate([boxes, scores.reshape((-1,1))], axis=-1)
    
    
    if len(scores) == 1:
        box_collect.append(box_score_array[0])
        return box_collect
    
    else:
        FLAG = 1
        while FLAG:
            # 拿分数排名第一的box和其余box计算iou
            box_score_array = box_score_array[np.argsort(-box_score_array[:,-1])]
            to_drop_row = [0] # 用于收集要删除的box
            for i in range(1, box_score_array.shape[0]):
                iou_score = iou(box_score_array[0], box_score_array[i])
                if iou_score >= iou_threshold:
                    to_drop_row.append(i)

            box_collect.append(box_score_array[0])
            box_score_array  = np.delete(box_score_array, obj=to_drop_row, axis=0)

            if len(box_score_array)>=2:
                FLAG = 1
            elif len(box_score_array)==1:
                box_collect.append(box_score_array[0])
                FLAG = 0
            else:
                FLAG = 0
            
    return box_collect

测试部分

见github

参考及致谢：

https://blog.csdn.net/leviopku/article/details/82660381

https://github.com/qqwweee/keras-yolo3

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

从头实现YOLO V3：利用Tensorflow-Keras，含YOLO模型结构图（详细）

前言

部分代码

测试部分

深度可分離卷積（Depthwise Seperable Convolution）與Mobilenet

從頭實現YOLO V3：利用Tensorflow-Keras，含YOLO模型結構圖（詳細）

經典網絡學習-看過的blog彙總

numpy 快速

編譯可訓練的tesseract時遇到的問題，tesseract-OCR，training-tool

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結