轉換生成yolo.h5
#! /usr/bin/env python
"""
Reads Darknet53 config and weights and creates Keras model with TF backend.
Currently only supports layers in Darknet53 config.
yad2k.py的作用主要是使用yolo的網絡結構配置文件和權重文件轉換成keras的.h5文件
"""
import argparse#argparse的作用就是爲py文件封裝好可以選擇的參數,使他們更加靈活,
import configparser#該模塊在python中用來讀取配置文件,配置文件的格式可以包含一個或多個節,每個節可以有多個參數
import io
import os
from collections import defaultdict
import numpy as np
from keras import backend as K
from keras.layers import (Conv2D, GlobalAveragePooling2D, Input, Reshape,
ZeroPadding2D, UpSampling2D, Activation, Lambda, MaxPooling2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.merge import concatenate, add
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from keras.utils.vis_utils import plot_model as plot
'''
arser = argparse.ArgumentParser()的目的是創建一個解析對象
add_argument的作用是向該對象中添加你要關注的命令行參數和選項,
每一個add_argument方法對應一個你要關注的參數或選項
最後調用parse_args方法進行解析,解析成功之後即可使用
'''
parser = argparse.ArgumentParser(description='Yet Another Darknet To Keras Converter.')
parser.add_argument('config_path', help='Path to Darknet cfg file.')
parser.add_argument('weights_path', help='Path to Darknet weights file.')
parser.add_argument('output_path', help='Path to output Keras model file.')
parser.add_argument(
'-p',
'--plot_model',
help='Plot generated Keras model and save as image.',
action='store_true')
parser.add_argument(
'-flcl',
'--fully_convolutional',
help='Model is fully convolutional so set input shape to (None, None, 3). '
'WARNING: This experimental option does not work properly for YOLO_v2.',
action='store_true')
def unique_config_sections(config_file):
"""Convert all config sections to have unique names.
Adds unique suffixes to config sections for compability with configparser.
轉換所有config部分,使其具有唯一名稱,併爲config解析器的兼容性向配置部分添加唯一後綴
"""
# defaultdict的作用是當字典裏的key不存在但被查找時,返回的不是keyError而是一個默認值,
# 這個默認值是多少根據defaultdict()中的值決定,如defaultdict(int)就表示這個默認值是0
section_counters = defaultdict(int)
output_stream = io.StringIO()#io.StringIO表示在內存中以io流的方式讀寫str
with open(config_file) as fin:
for line in fin:
if line.startswith('['):
section = line.strip().strip('[]')
_section = section + '_' + str(section_counters[section])
section_counters[section] += 1
line = line.replace(section, _section)
output_stream.write(line)
output_stream.seek(0)#把文件指針移動到文件開始處
return output_stream
def _main(args):
# 返回參數,參數中開頭的~被替換成user的主目錄;如果擴展失敗或者參數path不是以~打頭,則直接返回參數(path)。
config_path = os.path.expanduser(args.config_path)
weights_path = os.path.expanduser(args.weights_path)
# assert斷言是聲明其布爾值必須爲真的判定,如果發生異常就說明表達示爲假。
assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format(
config_path)
assert weights_path.endswith(
'.weights'), '{} is not a .weights file'.format(weights_path)
output_path = os.path.expanduser(args.output_path)
assert output_path.endswith(
'.h5'), 'output path {} is not a .h5 file'.format(output_path)
output_root = os.path.splitext(output_path)[0]
# Load weights and config.
print('Loading weights.')
weights_file = open(weights_path, 'rb')
# ndarray 是一個多維的數組對象,它有一個特點是同構,即其中所有元素的類型必須相同
weights_header = np.ndarray(shape=(5, ), dtype='int32', buffer=weights_file.read(20))
print('Weights Header: ', weights_header)
# TODO: Check transpose flag when implementing fully connected layers.
# transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000)
print('Parsing Darknet config.')
unique_config_file = unique_config_sections(config_path)
cfg_parser = configparser.ConfigParser()
cfg_parser.read_file(unique_config_file)
print('Creating Keras model.')
if args.fully_convolutional:#如果模型是全卷積的
image_height, image_width = None, None
else:
image_height = int(cfg_parser['net_0']['height'])
image_width = int(cfg_parser['net_0']['width'])
prev_layer = Input(shape=(image_height, image_width, 3))
all_layers = [prev_layer]
outputs = []
weight_decay = float(cfg_parser['net_0']['decay']
) if 'net_0' in cfg_parser.sections() else 5e-4
count = 0
for section in cfg_parser.sections():
print('Parsing section {}'.format(section))
if section.startswith('convolutional'):
filters = int(cfg_parser[section]['filters'])
size = int(cfg_parser[section]['size'])
stride = int(cfg_parser[section]['stride'])
pad = int(cfg_parser[section]['pad'])
activation = cfg_parser[section]['activation']
batch_normalize = 'batch_normalize' in cfg_parser[section]
# Setting weights.
# Darknet serializes convolutional weights as:
# [bias/beta, [gamma, mean, variance], conv_weights]
prev_layer_shape = K.int_shape(prev_layer)
# TODO: This assumes channel last dim_ordering.
weights_shape = (size, size, prev_layer_shape[-1], filters)
darknet_w_shape = (filters, weights_shape[2], size, size)
weights_size = np.product(weights_shape)
print('conv2d', 'bn'
if batch_normalize else ' ', activation, weights_shape)
conv_bias = np.ndarray(
shape=(filters, ),
dtype='float32',
buffer=weights_file.read(filters * 4))
count += filters
if batch_normalize:
bn_weights = np.ndarray(
shape=(3, filters),
dtype='float32',
buffer=weights_file.read(filters * 12))
count += 3 * filters
# TODO: Keras BatchNormalization mistakenly refers to var
# as std.
bn_weight_list = [
bn_weights[0], # scale gamma
conv_bias, # shift beta
bn_weights[1], # running mean
bn_weights[2] # running var
]
conv_weights = np.ndarray(
shape=darknet_w_shape,
dtype='float32',
buffer=weights_file.read(weights_size * 4))
count += weights_size
# DarkNet conv_weights are serialized Caffe-style:
# (out_dim, in_dim, height, width)
# We would like to set these to Tensorflow order:
# (height, width, in_dim, out_dim)
# TODO: Add check for Theano dim ordering.
conv_weights = np.transpose(conv_weights, [2, 3, 1, 0])
conv_weights = [conv_weights] if batch_normalize else [
conv_weights, conv_bias
]
# Handle activation.
act_fn = None
if activation == 'leaky':
pass # Add advanced activation later.
elif activation != 'linear':
raise ValueError(
'Unknown activation function `{}` in section {}'.format(
activation, section))
padding = 'same' if pad == 1 and stride == 1 else 'valid'
# Adjust padding model for darknet.
if stride == 2:
prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer)
# Create Conv2D layer
conv_layer = (Conv2D(
filters, (size, size),
strides=(stride, stride),
kernel_regularizer=l2(weight_decay),
use_bias=not batch_normalize,
weights=conv_weights,
activation=act_fn,
padding=padding))(prev_layer)
if batch_normalize:
conv_layer = (BatchNormalization(
weights=bn_weight_list))(conv_layer)
prev_layer = conv_layer
if activation == 'linear':
all_layers.append(prev_layer)
elif activation == 'leaky':
act_layer = LeakyReLU(alpha=0.1)(prev_layer)
prev_layer = act_layer
all_layers.append(prev_layer)
elif section.startswith('maxpool'):
size = int(cfg_parser[section]['size'])
stride = int(cfg_parser[section]['stride'])
all_layers.append(
MaxPooling2D(
padding='same',
pool_size=(size, size),
strides=(stride, stride))(prev_layer))
prev_layer = all_layers[-1]
elif section.startswith('avgpool'):
if cfg_parser.items(section) != []:
raise ValueError('{} with params unsupported.'.format(section))
all_layers.append(GlobalAveragePooling2D()(prev_layer))
prev_layer = all_layers[-1]
elif section.startswith('route'):
ids = [int(i) for i in cfg_parser[section]['layers'].split(',')]
if len(ids) == 2:
for i, item in enumerate(ids):
if item != -1:
ids[i] = item + 1
layers = [all_layers[i] for i in ids]
if len(layers) > 1:
print('Concatenating route layers:', layers)
concatenate_layer = concatenate(layers)
all_layers.append(concatenate_layer)
prev_layer = concatenate_layer
else:
skip_layer = layers[0] # only one layer to route
all_layers.append(skip_layer)
prev_layer = skip_layer
elif section.startswith('shortcut'):
ids = [int(i) for i in cfg_parser[section]['from'].split(',')][0]
activation = cfg_parser[section]['activation']
shortcut = add([all_layers[ids], prev_layer])
if activation == 'linear':
shortcut = Activation('linear')(shortcut)
all_layers.append(shortcut)
prev_layer = all_layers[-1]
elif section.startswith('upsample'):
stride = int(cfg_parser[section]['stride'])
all_layers.append(
UpSampling2D(
size=(stride, stride))(prev_layer))
prev_layer = all_layers[-1]
elif section.startswith('yolo'):
classes = int(cfg_parser[section]['classes'])
# num = int(cfg_parser[section]['num'])
# mask = int(cfg_parser[section]['mask'])
n1, n2 = int(prev_layer.shape[1]), int(prev_layer.shape[2])
n3 = 3
n4 = (4 + 1 + classes)
yolo = Reshape((n1, n2, n3, n4))(prev_layer)
all_layers.append(yolo)
prev_layer = all_layers[-1]
outputs.append(len(all_layers) - 1)
elif (section.startswith('net')):
pass # Configs not currently handled during model definition.
else:
raise ValueError(
'Unsupported section header type: {}'.format(section))
# Create and save model.
model = Model(inputs=all_layers[0],
outputs=[all_layers[i] for i in outputs])
print(model.summary())
model.save('{}'.format(output_path))
print('Saved Keras model to {}'.format(output_path))
# Check to see if all weights have been read.
remaining_weights = len(weights_file.read()) / 4
weights_file.close()
print('Read {} of {} from Darknet weights.'.format(count, count +
remaining_weights))
if remaining_weights > 0:
print('Warning: {} unused weights'.format(remaining_weights))
plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
plot(model, to_file='{}.png'.format(output_root), show_shapes=True)
print('Saved model plot to {}.png'.format(output_root))
if __name__ == '__main__':
_main(parser.parse_args())
demo
"""Demo for use yolo v3
"""
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO
def process_image(img):
"""Resize, reduce and expand image.
# Argument:
img: original image.
# Returns
image: ndarray(64, 64, 3), processed image.
"""
# 圖像縮放函數,將圖像縮放至指定大小,image是416x416x3矩陣,其中416和416是指定的圖像的寬和高的大小,3指圖像的通道爲3,這表示處理的是彩色圖像
image = cv2.resize(img, (416, 416),interpolation=cv2.INTER_CUBIC)
# 將圖像轉化爲指定類型的數組
image = np.array(image, dtype='float32')
image /= 255.#將圖像矩陣轉化至0~1之間
# np.expand_dims:用於擴展數組的形狀,這裏axis=0使image矩陣變成1x416x416x3矩陣了
image = np.expand_dims(image, axis=0)
return image
def get_classes(file):
"""Get classes name.
# Argument:
file: classes name for database.
# Returns
class_names: List, classes name.
"""
with open(file) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def draw(image, boxes, scores, classes, all_classes):
"""Draw the boxes on the image.
# Argument:
image: original image.
boxes: ndarray, boxes of objects.
classes: ndarray, classes of objects.
scores: ndarray, scores of objects.
all_classes: all classes name.
"""
for box, score, cl in zip(boxes, scores, classes):
x, y, w, h = box #x,y用於平行;w,h用於縮放
# 得到上下左右座標
top = max(0, np.floor(x + 0.5).astype(int))
left = max(0, np.floor(y + 0.5).astype(int))
right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))
cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)#通過對角線畫矩形
cv2.putText(image, '{0} {1:.2f}'.format(all_classes[cl], score),
(top, left - 6),
cv2.FONT_HERSHEY_SIMPLEX,
0.6, (0, 0, 255), 1,
cv2.LINE_AA)#爲圖片添加文字,其中(top, left - 6)指的是左上角座標
print('class: {0}, score: {1:.2f}'.format(all_classes[cl], score))
print('box coordinate x,y,w,h: {0}'.format(box))
print()
def detect_image(image, yolo, all_classes):
"""Use yolo v3 to detect images.
# Argument:
image: original image.
yolo: YOLO, yolo model.
all_classes: all classes name.
# Returns:
image: processed image.
"""
pimage = process_image(image)
start = time.time()
boxes, classes, scores = yolo.predict(pimage, image.shape)
'''
檢測的核心輸出,其中
boxes:框的四個點座標,(top, left, bottom, right);
scores:框的類別置信度,融合框置信度和類別置信度;
classes:框的類別;
'''
end = time.time()
print('time: {0:.2f}s'.format(end - start))#輸出所用時間
if boxes is not None:
draw(image, boxes, scores, classes, all_classes)
return image
def detect_video(video, yolo, all_classes):
"""Use yolo v3 to detect video.
# Argument:
video: video file.
yolo: YOLO, yolo model.
all_classes: all classes name.
"""
video_path = os.path.join("videos", "test", video)
camera = cv2.VideoCapture(video_path)
cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)
# Prepare for saving the detected video
sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
# cv2.VideoWriter_fourcc()函數的作用是輸入四個字符代碼即可得到對應的視頻編碼器。
fourcc = cv2.VideoWriter_fourcc(*'mpeg')
vout = cv2.VideoWriter()
vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)
while True:#以下代碼的作用是在視頻中截取圖片
res, frame = camera.read()
if not res:
break
image = detect_image(frame, yolo, all_classes)
cv2.imshow("detection", image)
# Save the video frame by frame
vout.write(image)
if cv2.waitKey(110) & 0xff == 27:
break
vout.release()
camera.release()
if __name__ == '__main__':
# 0.6和0.5分別表示threshold for object以及threshold for box;就是定義的兩個參數
yolo = YOLO(0.6, 0.5)
file = 'data/coco_classes.txt'
all_classes = get_classes(file)
'''
# detect images in test floder.
# os.walk返回的(root,dirs,files)分別表示:root 所指的是當前正在遍歷的這個文件夾的本身的地址;
# dirs 是一個 list ,內容是該文件夾中所有的目錄的名字(不包括子目錄);
# files 同樣是 list , 內容是該文件夾中所有的文件(不包括子目錄)
'''
# for (root, dirs, files) in os.walk('images/test'):
# if files:
# for f in files:
# print(f)
# path = os.path.join(root, f)
# image = cv2.imread(path)
# image = detect_image(image, yolo, all_classes)
# cv2.imwrite('images/res/' + f, image)
#
# # detect videos one at a time in videos/test folder
video = 'library1.mp4'
detect_video(video, yolo, all_classes)