前言

本次利用Tensorflow-Keras實現YOLO-V3模型，目的是自己動手實現YOLO-V3的重要結構，這樣才能更爲深入瞭解模型以及提升編程能力；略去了不太重要的結構，如tiny版本就沒有繼續實現；
重點研究了模型結構，在levio作圖的基礎上增加了各層的參數設置，以圖將模型展示的更加清晰明瞭；
實現了較爲細節也很重要的非最大值抑制、loss函數、如何生成true label等；
另外做了些測試：圖像目標檢測、視頻實時目標檢測測試；
詳細代碼步驟見：https://github.com/Teslaxhub/YOLO_V3_Tensorflow_Keras_from_scratch 含測試樣例；
轉圖請留言並註明出處，謝謝；
如有錯誤，歡迎指正。

部分代碼

import tensorflow as tf
import tensorflow.keras.backend as K
import keras
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import ZeroPadding2D
from tensorflow.keras.layers import Add
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import UpSampling2D
from tensorflow.keras.layers  import Flatten, Concatenate
from tensorflow.keras import Model, Input
from tensorflow.keras.losses import binary_crossentropy

import numpy as np
import pandas as pd

import cv2 as cv
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont

import os
from time import time

def preprocesing_image(file_path):
    '''
    處理流程：讀取 --> resize至模型的輸入尺寸 --> 歸一化爲[0,1]--> 升維,增加batch維度
    
    Returns: 
    --------
    img_resized_array: shape(1,416,416,3)
    '''
    
    
    img = Image.open(file_path)
    img_resized = img.resize((416,416))
    img_resized_array = np.asarray(img_resized)
    img_resized_array = img_resized_array / 255.
    img_resized_array = np.expand_dims(img_resized_array, 0)
    
    return img_resized_array


def load_anchors(file_path):
    '''
    加載anchors文件
    
    Returns：
    -------
    anchors: shape(9,2)
    '''
    with open(file_path) as f:
        anchors = f.readline()
    anchors = np.array([int(item.strip()) for item in anchors.split(',')]).reshape([-1,2])
    
    return anchors




def load_class_name(file_path):
    '''
    加載類別文件
    
    Returns：
    -------
    class_name: 類型爲list
    '''
    with open(file_path) as f:
        class_name = f.readlines()
    class_name = [item.strip() for item in class_name]
    
    return class_name



def sigmoid(x):
    y = 1 / (1+np.exp(-x))
    return y





def iou(box1, box2):
    
    '''
    box1、box2: xmin,ymin,xmax,ymax
    
    Returns
    -------
    iou_score: float,shape(0)

    '''
    
    intersect_x_min = np.max([box1[0], box2[0]])
    intersect_y_min = np.max([box1[1], box2[1]])
    intersect_x_max = np.min([box1[2], box2[2]])
    intersect_y_max = np.min([box1[3], box2[3]])
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    return iou_score





def iou_batch_array(box1, box2):
    
    '''
    Parameters
    ----------
    box: shape(batch,n,n,3,4)
    
    
    Returns
    --------
    iou_score: shape(batch,n,n,3)
    '''
    intersect_x_min = K.maximum(box1[..., 0], box2[..., 0])
    intersect_y_min = K.maximum(box1[..., 1], box2[..., 1])
    intersect_x_max = K.minimum(box1[..., 2], box2[..., 2])
    intersect_y_max = K.minimum(box1[..., 3], box2[..., 3])
    
    
    intersect_area = (intersect_x_max - intersect_x_min) * (intersect_y_max - intersect_y_min)
    
    box1_area = (box1[..., 2] - box1[..., 0]) * (box1[..., 3] - box1[..., 1])
    box2_area = (box2[..., 2] - box2[..., 0]) * (box2[..., 3] - box2[..., 1])
    
    iou_score = intersect_area / (box1_area + box2_area - intersect_area)
    
    
    return iou_score




def non_max_suspension(scores, boxes, iou_threshold, max_box):
    '''
    Parameters
    ----------
    scores: 某一個class類別下，shape(n,1)
    boxes: 某一個class類別下，shape(n,4)
    iou_threshold: iou閾值，剔除高於閾值的box
    max_box: TODO, 該class類別下，保留最多幾個。
    
    Returns
    -------
    box_collect: 某個類別下，做非最大值抑制後，保留下來的box，[(xmin,ymin,xmax,ymax,score),...]
    
    '''
    
    # 用於收集確認的box
    box_collect = []
    box_score_array = np.concatenate([boxes, scores.reshape((-1,1))], axis=-1)
    
    
    if len(scores) == 1:
        box_collect.append(box_score_array[0])
        return box_collect
    
    else:
        FLAG = 1
        while FLAG:
            # 拿分數排名第一的box和其餘box計算iou
            box_score_array = box_score_array[np.argsort(-box_score_array[:,-1])]
            to_drop_row = [0] # 用於收集要刪除的box
            for i in range(1, box_score_array.shape[0]):
                iou_score = iou(box_score_array[0], box_score_array[i])
                if iou_score >= iou_threshold:
                    to_drop_row.append(i)

            box_collect.append(box_score_array[0])
            box_score_array  = np.delete(box_score_array, obj=to_drop_row, axis=0)

            if len(box_score_array)>=2:
                FLAG = 1
            elif len(box_score_array)==1:
                box_collect.append(box_score_array[0])
                FLAG = 0
            else:
                FLAG = 0
            
    return box_collect

測試部分

見github

參考及致謝：

https://blog.csdn.net/leviopku/article/details/82660381

https://github.com/qqwweee/keras-yolo3

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

從頭實現YOLO V3：利用Tensorflow-Keras，含YOLO模型結構圖（詳細）

前言

部分代碼

測試部分

Spring Cloud 部署時如何使用 Kubernetes 作爲註冊中心和配置中心

KubeKey 部署 K8s v1.28.8 實戰

深度可分離卷積（Depthwise Seperable Convolution）與Mobilenet

從頭實現YOLO V3：利用Tensorflow-Keras，含YOLO模型結構圖（詳細）

經典網絡學習-看過的blog彙總

numpy 快速

編譯可訓練的tesseract時遇到的問題，tesseract-OCR，training-tool

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結