TensorFlow-SSD測試代碼梳理,相信看過後對SSD的整個流程都一目瞭然了。
硬件:NVIDIA-GTX1080
軟件:Windows7、python3.6.5、tensorflow-gpu-1.4.0
一、基礎知識
1、anchor boxes (x, y, w, h)
1.1 x和y
例如38x38特徵圖輸出,則對應原始圖像300/38 = 8x8大小,則特徵圖[0]的x和y相對原始圖像的大小爲3和3,歸一化(相對各自單元格)後就是0.5和0.5,特徵圖[1]的x和y相對原始圖像大小爲11和3,歸一化後就是1.5和0.5,以此類推。。。
1.2 w和h
1.2.1 根據經驗公式得出6層輸出的anchor boxes邊界框尺寸(對應300x300圖像):
step= (max_ratio*100 - min_ratio*100) / (n_feat_layers - 2)
ratio = [min_ratio:step:max_ratio]
box_size[0] = 300*ratio / 100 ...(表示1:1)
box_size[1] = 300*(ratio+step) / 100 ...(表示1':1',1'表示邊框比1大,但是比例還是1:1)
1.2.2 根據寬高比例得出6層輸出對應的anchor boxes邊界框大小(scale = 1:1, 1':1', 1:2, 2:1, 1:3, 3:1)
w[0] = box_size[0], h[0] = box_size[0] 歸一化後 w[0] /= img_size[0], h[0] /= img_size[1]
w[1] = box_size[1], h[1] = box_size[1] 歸一化後 w[1] /= img_size[0], h[1] /= img_size[1]
w[2:] = box_size[0] * sqrt(scale[2:]), h[2:] = box_size[0] / sqrt(scale[2:]) 歸一化道理類似
1.3 邊界框總個數
38x38x4 + 19x19x6 + 10x10x6 + 5x5x6 + 3x3x4 + 1x1x4 = 8732
2、邊界框相對anchor boxes迴歸
P for prediction, B for boundingbox, A for Anchor boxes, S for prior_scaling超參數
Px = (Bx - Ax)/(Aw * Sx) 則 Bx = Px * Aw * Sx + Ax,Py道理類似
Pw = ln(Bw / Aw) / Sw 則 Bw = exp(Pw * Sw) * Aw,Ph道理類似
3、與YOLO不同,SSD是直接將背景作爲第21類進行分類學習,其分類的結果就是得分
二、代碼展示(做了詳細註釋)
1.demo_ssd.py
"""
SSD demo
"""
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.image as mpimg
from ssd_300_vgg import SSD
from utils import preprocess_image, process_bboxes
from visualization import plt_bboxes
ssd_net = SSD()
classes, scores, bboxes = ssd_net.detections()
images = ssd_net.images()
sess = tf.Session()
# Restore SSD model.
ckpt_filename = './ssd_checkpoints/ssd_vgg_300_weights.ckpt'
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, ckpt_filename)
img = cv2.imread('./demo/dog.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_prepocessed = preprocess_image(img)
rclasses, rscores, rbboxes = sess.run([classes, scores, bboxes],
feed_dict={images: img_prepocessed})
rclasses, rscores, rbboxes = process_bboxes(rclasses, rscores, rbboxes)
plt_bboxes(img, rclasses, rscores, rbboxes)
2.ssd_300_vgg.py
"""
SSD net (vgg_based) 300x300
"""
from collections import namedtuple
import numpy as np
import tensorflow as tf
from ssd_layers import conv2d, max_pool2d, l2norm, dropout, \
pad2d, ssd_multibox_layer
from ssd_anchors import ssd_anchors_all_layers
# SSD parameters
SSDParams = namedtuple('SSDParameters', ['img_shape', # the input image size: 300x300
'num_classes', # number of classes: 20+1
'no_annotation_label',
'feat_layers', # 輸出各特徵圖名稱
'feat_shapes', # 輸出各特徵圖大小
'anchor_size_bounds', # 通過Smin和Smax計算anchor尺寸
'anchor_sizes', # anchor尺寸
'anchor_ratios', # anchor寬高比
'anchor_steps', # 單元塊對應原圖大小,如(38x38)對應(300/38 = 8x8)
'anchor_offset', # the center point offset
'normalizations', # list of normalizations of layer for detection
'prior_scaling' # 超參數variance,計算實際邊框
])
class SSD(object):
"""SSD net 300"""
def __init__(self, is_training=True):
self.is_training = is_training
self.threshold = 0.5 # class score threshold
self.ssd_params = SSDParams(img_shape=(300, 300),
num_classes=21,
no_annotation_label=21,
feat_layers=["block4", "block7", "block8", "block9", "block10", "block11"],
feat_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
anchor_size_bounds=[0.15, 0.90], # diff from the original paper
# num1->1:1, num2->1:1(+step)
anchor_sizes=[(21., 45.),
(45., 99.),
(99., 153.),
(153., 207.),
(207., 261.),
(261., 315.)],
anchor_ratios=[[2, .5],
[2, .5, 3, 1. / 3],
[2, .5, 3, 1. / 3],
[2, .5, 3, 1. / 3],
[2, .5],
[2, .5]],
anchor_steps=[8, 16, 32, 64, 100, 300],
anchor_offset=0.5,
normalizations=[20, -1, -1, -1, -1, -1],
prior_scaling=[0.1, 0.1, 0.2, 0.2]
)
predictions, logits, locations = self._built_net()
#self._update_feat_shapes_from_net()
classes, scores, bboxes = self._bboxes_select(predictions, locations)
self._classes = classes
self._scores = scores
self._bboxes = bboxes
def _built_net(self):
"""Construct the SSD net"""
self.end_points = {} # record the detection layers output
self._images = tf.placeholder(tf.float32, shape=[None, self.ssd_params.img_shape[0],
self.ssd_params.img_shape[1], 3])
with tf.variable_scope("ssd_300_vgg"):
# original vgg layers
# block 1
net = conv2d(self._images, 64, 3, scope="conv1_1")
net = conv2d(net, 64, 3, scope="conv1_2")
self.end_points["block1"] = net
net = max_pool2d(net, 2, scope="pool1")
# block 2
net = conv2d(net, 128, 3, scope="conv2_1")
net = conv2d(net, 128, 3, scope="conv2_2")
self.end_points["block2"] = net
net = max_pool2d(net, 2, scope="pool2")
# block 3
net = conv2d(net, 256, 3, scope="conv3_1")
net = conv2d(net, 256, 3, scope="conv3_2")
net = conv2d(net, 256, 3, scope="conv3_3")
self.end_points["block3"] = net
net = max_pool2d(net, 2, scope="pool3")
# block 4
net = conv2d(net, 512, 3, scope="conv4_1")
net = conv2d(net, 512, 3, scope="conv4_2")
net = conv2d(net, 512, 3, scope="conv4_3")
self.end_points["block4"] = net
net = max_pool2d(net, 2, scope="pool4")
# block 5
net = conv2d(net, 512, 3, scope="conv5_1")
net = conv2d(net, 512, 3, scope="conv5_2")
net = conv2d(net, 512, 3, scope="conv5_3")
self.end_points["block5"] = net
print(net)
net = max_pool2d(net, 3, stride=1, scope="pool5")
print(net)
# additional SSD layers
# block 6: use dilate conv
net = conv2d(net, 1024, 3, dilation_rate=6, scope="conv6")
self.end_points["block6"] = net
#net = dropout(net, is_training=self.is_training)
# block 7
net = conv2d(net, 1024, 1, scope="conv7")
self.end_points["block7"] = net
# block 8
net = conv2d(net, 256, 1, scope="conv8_1x1")
net = conv2d(pad2d(net, 1), 512, 3, stride=2, scope="conv8_3x3",
padding="valid")
self.end_points["block8"] = net
# block 9
net = conv2d(net, 128, 1, scope="conv9_1x1")
net = conv2d(pad2d(net, 1), 256, 3, stride=2, scope="conv9_3x3",
padding="valid")
self.end_points["block9"] = net
# block 10
net = conv2d(net, 128, 1, scope="conv10_1x1")
net = conv2d(net, 256, 3, scope="conv10_3x3", padding="valid")
self.end_points["block10"] = net
# block 11
net = conv2d(net, 128, 1, scope="conv11_1x1")
net = conv2d(net, 256, 3, scope="conv11_3x3", padding="valid")
self.end_points["block11"] = net
# class and location predictions
predictions = []
logits = []
locations = []
for i, layer in enumerate(self.ssd_params.feat_layers):
cls, loc = ssd_multibox_layer(self.end_points[layer], self.ssd_params.num_classes,
self.ssd_params.anchor_sizes[i],
self.ssd_params.anchor_ratios[i],
self.ssd_params.normalizations[i], scope=layer+"_box")
predictions.append(tf.nn.softmax(cls))
logits.append(cls)
locations.append(loc)
return predictions, logits, locations
def _update_feat_shapes_from_net(self, predictions):
""" Obtain the feature shapes from the prediction layers"""
new_feat_shapes = []
for l in predictions:
new_feat_shapes.append(l.get_shape().as_list()[1:])
self.ssd_params._replace(feat_shapes=new_feat_shapes)
def anchors(self):
"""Get SSD anchors"""
return ssd_anchors_all_layers(self.ssd_params.img_shape,
self.ssd_params.feat_shapes,
self.ssd_params.anchor_sizes,
self.ssd_params.anchor_ratios,
self.ssd_params.anchor_steps,
self.ssd_params.anchor_offset,
np.float32)
def _bboxes_decode_layer(self, feat_locations, anchor_bboxes, prior_scaling):
"""
Decode the feat location of one layer
params:
feat_locations: 5D Tensor, [batch_size, size, size, n_anchors, 4]
anchor_bboxes: list of Tensors(y, x, w, h)
shape: [size,size,1], [size, size,1], [n_anchors], [n_anchors]
prior_scaling: list of 4 floats
"""
yref, xref, href, wref = anchor_bboxes
print(yref)
# Compute center, height and width(feat_locations: None,38,38,4,4)
# P for prediction, B for boundingbox, A for Anchor boxes, S for prior_scaling超參數
# Px = (Bx - Ax)/(Aw * Sx) -> Bx = Px * Aw * Sx + Ax
# Pw = ln(Bw / Aw) / Sw -> Bw = exp(Pw * Sw) * Aw
cx = feat_locations[:, :, :, :, 0] * wref * prior_scaling[0] + xref
cy = feat_locations[:, :, :, :, 1] * href * prior_scaling[1] + yref
w = wref * tf.exp(feat_locations[:, :, :, :, 2] * prior_scaling[2])
h = href * tf.exp(feat_locations[:, :, :, :, 3] * prior_scaling[3])
# compute boxes coordinates (ymin, xmin, ymax,,xmax)
bboxes = tf.stack([cy - h / 2., cx - w / 2.,
cy + h / 2., cx + w / 2.], axis=-1)
# shape [batch_size, size, size, n_anchors, 4]
return bboxes
def _bboxes_select_layer(self, feat_predictions, feat_locations, anchor_bboxes,
prior_scaling):
"""Select boxes from the feat layer, only for bacth_size=1"""
# None,38,38,4,21
n_bboxes = np.product(feat_predictions.get_shape().as_list()[1:-1])
# decode the location
bboxes = self._bboxes_decode_layer(feat_locations, anchor_bboxes, prior_scaling)
bboxes = tf.reshape(bboxes, [n_bboxes, 4])
predictions = tf.reshape(feat_predictions, [n_bboxes, self.ssd_params.num_classes])
# remove the background predictions
sub_predictions = predictions[:, 1:]
# choose the max score class
classes = tf.argmax(sub_predictions, axis=1) + 1 # class labels
scores = tf.reduce_max(sub_predictions, axis=1) # max_class scores
# Boxes selection: use threshold
filter_mask = scores > self.threshold
classes = tf.boolean_mask(classes, filter_mask)
scores = tf.boolean_mask(scores, filter_mask)
bboxes = tf.boolean_mask(bboxes, filter_mask)
return classes, scores, bboxes
def _bboxes_select(self, predictions, locations):
"""Select all bboxes predictions, only for bacth_size=1"""
anchor_bboxes_list = self.anchors()
classes_list = []
scores_list = []
bboxes_list = []
# select bboxes for each feat layer
for n in range(len(predictions)):
# 如anchor_bboxes_list[0]: y(38,38,1), x(38,38,1), h(4,), w(4,)
anchor_bboxes = list(map(tf.convert_to_tensor, anchor_bboxes_list[n]))
classes, scores, bboxes = self._bboxes_select_layer(predictions[n],
locations[n], anchor_bboxes, self.ssd_params.prior_scaling)
classes_list.append(classes)
scores_list.append(scores)
bboxes_list.append(bboxes)
# combine all feat layers
classes = tf.concat(classes_list, axis=0)
scores = tf.concat(scores_list, axis=0)
bboxes = tf.concat(bboxes_list, axis=0)
return classes, scores, bboxes
def images(self):
return self._images
def detections(self):
return self._classes, self._scores, self._bboxes
'''
if __name__ == "__main__":
ssd = SSD()
sess = tf.Session()
saver_ = tf.train.Saver()
saver_.restore(sess, "../SSD-Tensorflow-master/ssd_checkpoints/ssd_vgg_300_weights.ckpt")
'''
3.ssd_layers.py
"""
Layers for SSD
"""
import tensorflow as tf
# Conv2d: for stride = 1
def conv2d(x, filters, kernel_size, stride=1, padding="same",
dilation_rate=1, activation=tf.nn.relu, scope="conv2d"):
kernel_sizes = [kernel_size] * 2
strides = [stride] * 2
dilation_rate = [dilation_rate] * 2
return tf.layers.conv2d(x, filters, kernel_sizes, strides=strides,
dilation_rate=dilation_rate, padding=padding,
name=scope, activation=activation)
# max pool2d: default pool_size = stride
def max_pool2d(x, pool_size, stride=None, scope="max_pool2d"):
pool_sizes = [pool_size] * 2
strides = [pool_size] * 2 if stride is None else [stride] * 2
return tf.layers.max_pooling2d(x, pool_sizes, strides, name=scope, padding="same")
# pad2d: for conv2d with stride > 1
def pad2d(x, pad):
return tf.pad(x, paddings=[[0, 0], [pad, pad], [pad, pad], [0, 0]])
# dropout
def dropout(x, rate=0.5, is_training=True):
return tf.layers.dropout(x, rate=rate, training=is_training)
# l2norm (not bacth norm, spatial normalization)
def l2norm(x, scale, trainable=True, scope="L2Normalization"):
n_channels = x.get_shape().as_list()[-1]
l2_norm = tf.nn.l2_normalize(x, [3], epsilon=1e-12)
with tf.variable_scope(scope):
gamma = tf.get_variable("gamma", shape=[n_channels, ], dtype=tf.float32,
initializer=tf.constant_initializer(scale),
trainable=trainable)
return l2_norm * gamma
# multibox layer: get class and location predicitions from detection layer
def ssd_multibox_layer(x, num_classes, sizes, ratios, normalization=-1, scope="multibox"):
# 38x38 19x19 ...
pre_shape = x.get_shape().as_list()[1:-1]
# Nonex38x38 Nonex19x19 ...
pre_shape = [-1] + pre_shape
with tf.variable_scope(scope):
# l2 norm
if normalization > 0:
x = l2norm(x, normalization)
print(x)
# numbers of anchors
n_anchors = len(sizes) + len(ratios)
# location predictions
loc_pred = conv2d(x, n_anchors*4, 3, activation=None, scope="conv_loc")
loc_pred = tf.reshape(loc_pred, pre_shape + [n_anchors, 4])
# class prediction
cls_pred = conv2d(x, n_anchors*num_classes, 3, activation=None, scope="conv_cls")
cls_pred = tf.reshape(cls_pred, pre_shape + [n_anchors, num_classes])
return cls_pred, loc_pred
4.ssd_anchors.py
"""
SSD anchors
"""
import math
import numpy as np
# 通過Smin和Smax計算anchor尺寸
def ssd_size_bounds_to_values(size_bounds,
n_feat_layers,
img_shape=(300, 300)):
"""Compute the reference sizes of the anchor boxes from relative bounds.
The absolute values are measured in pixels, based on the network
default size (300 pixels).
This function follows the computation performed in the original
implementation of SSD in Caffe.
Return:
list of list containing the absolute sizes at each scale. For each scale,
the ratios only apply to the first value.
"""
assert img_shape[0] == img_shape[1]
img_size = img_shape[0]
min_ratio = int(size_bounds[0] * 100)
max_ratio = int(size_bounds[1] * 100)
step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2)))
# Start with the following smallest sizes.
sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]]
for ratio in range(min_ratio, max_ratio + 1, step):
sizes.append((img_size * ratio / 100.,
img_size * (ratio + step) / 100.))
return sizes
def ssd_anchor_one_layer(img_shape,
feat_shape,
sizes,
ratios,
step,
offset=0.5,
dtype=np.float32):
"""Computer SSD default anchor boxes for one feature layer.
Determine the relative position grid of the centers, and the relative
width and height.
Arguments:
feat_shape: Feature shape, used for computing relative position grids;
size: Absolute reference sizes;
ratios: Ratios to use on these features;
img_shape: Image shape, used for computing height, width relatively to the
former;
offset: Grid offset.
Return:
y, x, h, w: Relative x and y grids, and height and width.
"""
# Compute the position grid: simple way.
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
# y = (y.astype(dtype) + offset) / feat_shape[0]
# x = (x.astype(dtype) + offset) / feat_shape[1]
# Weird SSD-Caffe computation using steps values...
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
# * step 表示單元格中心位置相對300x300距離左上頂點的實際位置(/img_shape[0])
y = (y.astype(dtype) + offset) * step / img_shape[0]
x = (x.astype(dtype) + offset) * step / img_shape[1]
# Expand dims to support easy broadcasting.
y = np.expand_dims(y, axis=-1) # [size, size, 1]
x = np.expand_dims(x, axis=-1) # [size, size, 1]
# Compute relative height and width.
# Tries to follow the original implementation of SSD for the order.
num_anchors = len(sizes) + len(ratios)
h = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors]
w = np.zeros((num_anchors, ), dtype=dtype) # [n_anchors]
# 寬高都是相對300x300的實際距離(/img_shape[0])
# Add first anchor boxes with ratio=1:1(for sizes[0])
h[0] = sizes[0] / img_shape[0]
w[0] = sizes[0] / img_shape[1]
di = 1
# Add second anchor boxes with ratio=1:1(for sizes[1])
if len(sizes) > 1:
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
di += 1
# Add rest ratios(only for sizes[0])
# why sqrt??? for train???
# X*ratio * X*(1/ratio) = X*X(面積不變)
for i, r in enumerate(ratios):
h[i+di] = sizes[0] / math.sqrt(r) / img_shape[0]
w[i+di] = sizes[0] * math.sqrt(r) / img_shape[1]
return y, x, h, w
def ssd_anchors_all_layers(img_shape,
layers_shape,
anchor_sizes,
anchor_ratios,
anchor_steps,
offset=0.5,
dtype=np.float32):
"""Compute anchor boxes for all feature layers.
"""
layers_anchors = []
for i, s in enumerate(layers_shape):
anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
anchor_sizes[i],
anchor_ratios[i],
anchor_steps[i],
offset=offset, dtype=dtype)
layers_anchors.append(anchor_bboxes)
return layers_anchors
'''
# 通過Smin和Smax計算anchor尺寸
output = ssd_size_bounds_to_values([0.15, 0.90], 6)
print(output)
'''
5.utils.py
"""
Help functions for SSD
"""
import cv2
import numpy as np
############## preprocess image ##################
# whiten the image
def whiten_image(image, means=(123., 117., 104.)):
"""Subtracts the given means from each image channel"""
if image.ndim != 3:
raise ValueError('Input must be of size [height, width, C>0]')
num_channels = image.shape[-1]
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
mean = np.array(means, dtype=image.dtype)
image = image - mean
return image
def resize_image(image, size=(300, 300)):
return cv2.resize(image, size)
def preprocess_image(image):
"""Preprocess a image to inference"""
image_cp = np.copy(image).astype(np.float32)
# whiten the image
image_whitened = whiten_image(image_cp)
# resize the image
image_resized = resize_image(image_whitened)
# expand the batch_size dim
image_expanded = np.expand_dims(image_resized, axis=0)
return image_expanded
############## process bboxes ##################
def bboxes_clip(bbox_ref, bboxes):
"""Clip bounding boxes with respect to reference bbox.
"""
bboxes = np.copy(bboxes)
bboxes = np.transpose(bboxes)
bbox_ref = np.transpose(bbox_ref)
bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
bboxes = np.transpose(bboxes)
return bboxes
def bboxes_sort(classes, scores, bboxes, top_k=400):
"""Sort bounding boxes by decreasing order and keep only the top_k
"""
# if priority_inside:
# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
# idxes = np.argsort(-scores)
# inside = inside[idxes]
# idxes = np.concatenate([idxes[inside], idxes[~inside]])
idxes = np.argsort(-scores)
classes = classes[idxes][:top_k]
scores = scores[idxes][:top_k]
bboxes = bboxes[idxes][:top_k]
return classes, scores, bboxes
def bboxes_iou(bboxes1, bboxes2):
"""Computing iou between bboxes1 and bboxes2.
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
"""
bboxes1 = np.transpose(bboxes1)
bboxes2 = np.transpose(bboxes2)
# Intersection bbox and volume.
int_ymin = np.maximum(bboxes1[0], bboxes2[0])
int_xmin = np.maximum(bboxes1[1], bboxes2[1])
int_ymax = np.minimum(bboxes1[2], bboxes2[2])
int_xmax = np.minimum(bboxes1[3], bboxes2[3])
int_h = np.maximum(int_ymax - int_ymin, 0.)
int_w = np.maximum(int_xmax - int_xmin, 0.)
int_vol = int_h * int_w
# Union volume.
vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
iou = int_vol / (vol1 + vol2 - int_vol)
return iou
def bboxes_nms(classes, scores, bboxes, nms_threshold=0.5):
"""Apply non-maximum selection to bounding boxes.
"""
# 各類別一視同仁???爲啥不分開???
# 一個位置很難同時出現兩個類別???
keep_bboxes = np.ones(scores.shape, dtype=np.bool)
for i in range(scores.size-1):
if keep_bboxes[i]:
# Computer overlap with bboxes which are following.
overlap = bboxes_iou(bboxes[i], bboxes[(i+1):])
# Overlap threshold for keeping + checking part of the same class
keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
idxes = np.where(keep_bboxes)
return classes[idxes], scores[idxes], bboxes[idxes]
def bboxes_resize(bbox_ref, bboxes):
"""Resize bounding boxes based on a reference bounding box,
assuming that the latter is [0, 0, 1, 1] after transform.
"""
bboxes = np.copy(bboxes)
# Translate.
bboxes[:, 0] -= bbox_ref[0]
bboxes[:, 1] -= bbox_ref[1]
bboxes[:, 2] -= bbox_ref[0]
bboxes[:, 3] -= bbox_ref[1]
# Resize.
resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
bboxes[:, 0] /= resize[0]
bboxes[:, 1] /= resize[1]
bboxes[:, 2] /= resize[0]
bboxes[:, 3] /= resize[1]
return bboxes
def process_bboxes(rclasses, rscores, rbboxes, rbbox_img = (0.0, 0.0, 1.0, 1.0),
top_k=400, nms_threshold=0.5):
"""Process the bboxes including sort and nms"""
# 保證邊框未越界
rbboxes = bboxes_clip(rbbox_img, rbboxes)
# 邊框根據概率排序
rclasses, rscores, rbboxes = bboxes_sort(rclasses, rscores, rbboxes, top_k)
# NMS
rclasses, rscores, rbboxes = bboxes_nms(rclasses, rscores, rbboxes, nms_threshold)
# rbbox_img爲固定的(0,0,1,1),不存在邊界擴大或縮小問題,所以可以省略此步
rbboxes = bboxes_resize(rbbox_img, rbboxes)
return rclasses, rscores, rbboxes
6.visualization.py
import cv2
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.cm as mpcm
# class names
CLASSES = ["aeroplane", "bicycle", "bird", "boat", "bottle",
"bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant",
"sheep", "sofa", "train","tvmonitor"]
# =========================================================================== #
# Matplotlib show...
# =========================================================================== #
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5, show_class_name=True):
"""Visualize bounding boxes. Largely inspired by SSD-MXNET!
"""
fig = plt.figure(figsize=figsize)
plt.imshow(img)
height = img.shape[0]
width = img.shape[1]
colors = dict()
for i in range(classes.shape[0]):
cls_id = int(classes[i])
if cls_id >= 0:
score = scores[i]
if cls_id not in colors:
colors[cls_id] = (random.random(), random.random(), random.random())
ymin = int(bboxes[i, 0] * height)
xmin = int(bboxes[i, 1] * width)
ymax = int(bboxes[i, 2] * height)
xmax = int(bboxes[i, 3] * width)
rect = plt.Rectangle((xmin, ymin), xmax - xmin,
ymax - ymin, fill=False,
edgecolor=colors[cls_id],
linewidth=linewidth)
plt.gca().add_patch(rect)
class_name = CLASSES[cls_id-1] if show_class_name else str(cls_id)
plt.gca().text(xmin, ymin - 2,
'{:s} | {:.3f}'.format(class_name, score),
bbox=dict(facecolor=colors[cls_id], alpha=0.5),
fontsize=12, color='white')
plt.show()
三、模型下載
https://pan.baidu.com/s/1snhuTsT
四、結果展示
五、參考
https://zhuanlan.zhihu.com/p/35325884
任何問題請加唯一QQ2258205918(名稱samylee)!