lib\model\test.py裏主要是test_net.py,用於faster測試的時候調用模型進行測試,並對結果進行保存。在tools/test_net.py裏被調用。函數相對簡單,但確實是測試時的整個流程了。基本每一句都寫得很清楚了。
# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Xinlei Chen
#解析:nansbas
# --------------------------------------------------------
#-*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import cv2
import numpy as np
try:
import cPickle as pickle
except ImportError:
import pickle
import os
import math
from utils.timer import Timer
from utils.blob import im_list_to_blob
from model.config import cfg, get_output_dir
from model.bbox_transform import clip_boxes, bbox_transform_inv
from model.nms_wrapper import nms
def _get_image_blob(im):
"""Converts an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale_factors (list): list of image scales (relative to im) used
in the image pyramid
"""
#圖像轉換爲float32。注意是bgr順序,不是rgb順序
im_orig = im.astype(np.float32, copy=True)
#原始圖像減去均值。均值影響的是收斂速度,其實差不多在一個範圍內就行。
im_orig -= cfg.PIXEL_MEANS
#提取最大、最小邊
im_shape = im_orig.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
#張開保存處理結果的張亮
processed_ims = []
#初始化尺度因子
im_scale_factors = []
#cfg.TEST.SCALES=600
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
#按上面計算得到的尺度因子im_scale縮放圖像
im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR)
im_scale_factors.append(im_scale)
processed_ims.append(im)
# Create a blob to hold the input images
#調用im_list_to_blob,將改變好大小的圖像封裝成一個blob並且返回
blob = im_list_to_blob(processed_ims)
#返回Blob,以及尺度因子。
return blob, np.array(im_scale_factors)
def _get_blobs(im):#將blob生成封裝成一個更簡單的方法,層級結構方便處理,其實返回的結果還是一樣的。
"""Convert an image and RoIs within that image into network inputs."""
blobs = {}
blobs['data'], im_scale_factors = _get_image_blob(im)
return blobs, im_scale_factors
def _clip_boxes(boxes, im_shape):#這個就是爲了保證圖像邊緣的檢測結果得到的框不超過圖像大小。主要是x2和y2。一個保障機制,一般都不會發生作用。
"""Clip boxes to image boundaries."""
# x1 >= 0
boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
# y2 < im_shape[0]
boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
return boxes
def _rescale_boxes(boxes, inds, scales):
"""Rescale boxes according to image rescaling."""
for i in range(boxes.shape[0]):
boxes[i,:] = boxes[i,:] / scales[int(inds[i])]
return boxes
def im_detect(sess, net, im):#調用訓練好的參數以及網絡結構對圖像進行訓練,並返回檢測到的目標類別以及座標信息。
blobs, im_scales = _get_blobs(im)
#保障機制,如果不是輸入單張圖像就報錯並中斷程序
assert len(im_scales) == 1, "Only single-image batch implemented"
im_blob = blobs['data']
#用dtype指定數據類型爲float32,並且將圖像轉爲np的多維數組格式。
blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32)
_, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info'])
boxes = rois[:, 1:5] / im_scales[0]
scores = np.reshape(scores, [scores.shape[0], -1])
bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
if cfg.TEST.BBOX_REG:
# Apply bounding-box regression deltas
#bbox_pred得到的結果從下面的處理可以看出,是regression的增量,在Boxes的基礎上加上這個增量得到檢測的Boxes
#bbox_transform_inv是寫好的Boxes和增量計算得到完整的檢測座標的轉換函數。
box_deltas = bbox_pred
pred_boxes = bbox_transform_inv(boxes, box_deltas)
pred_boxes = _clip_boxes(pred_boxes, im.shape)
else:
# Simply repeat the boxes, once for each class
#我們檢測到這裏還得到的還是得分,還沒有類別信息,所以我們假設,每一類在這個位置都有一個框,就是簡單的複製一下。scores.shape[1]其實就是類別數。
pred_boxes = np.tile(boxes, (1, scores.shape[1]))
return scores, pred_boxes
def apply_nms(all_boxes, thresh):#nms最大值抑制,用來避免很多框框疊加在一起的。注意這裏程度Nms是cpu的。不記得從哪裏看到的了,說cpu的Nms比gpu的快。
"""Apply non-maximum suppression to all predicted boxes output by the
test_net method.
"""
num_classes = len(all_boxes)
num_images = len(all_boxes[0])
nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
for cls_ind in range(num_classes):#對於每一類
for im_ind in range(num_images):#對於每一類的每一個檢測結果應用Nms
dets = all_boxes[cls_ind][im_ind]#dets爲檢測結果選取的檢測結果
if dets == []:#如果檢測結果爲空,比如該類沒有檢測結果,就跳過。
continue
#首先提取檢測結果的四個座標以及得分
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
#下面計算滿足條件的索引,找到面積非0的矩形框。真正的nms在後面
inds = np.where((x2 > x1) & (y2 > y1))[0]
#提取索引對應的框
dets = dets[inds,:]
#所以目標不爲空,就調用nms。nms的閾值也一併輸入
if dets == []:
continue
keep = nms(dets, thresh)
if len(keep) == 0:
continue
#將nms的檢測結果複製出來
nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
#返回nms的檢測結果
return nms_boxes
#test_net.py是總函數,上面的函數都被這個函數調用。在test_net.py裏面被調用。其實就是一個測試圖像的整體流程主代碼了。
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.):
#首先設置隨機種子
np.random.seed(cfg.RNG_SEED)
"""Test a Fast R-CNN network on an image database."""
num_images = len(imdb.image_index)
# all detections are collected into:
# all_boxes[cls][image] = N x 5 array of detections in
# (x1, y1, x2, y2, score)
#對於每張圖像的每一類,張開一個空間,之後的結果就可以放進去。
all_boxes = [[[] for _ in range(num_images)]
for _ in range(imdb.num_classes)]
output_dir = get_output_dir(imdb, weights_filename)
# timers
#打開計時器
_t = {'im_detect' : Timer(), 'misc' : Timer()}
#對於每一張測試圖像
for i in range(num_images):
#調用路徑,讀入圖像
im = cv2.imread(imdb.image_path_at(i))
#第一個計時器標籤:記錄檢測的時間
_t['im_detect'].tic()
#調用im_detect對圖像進行檢測,返回框和屬於每一類的得分
scores, boxes = im_detect(sess, net, im)#the scores get from net function but this func.-qhy
_t['im_detect'].toc()
_t['misc'].tic()
# skip j = 0, because it's the background class
for j in range(1, imdb.num_classes):
inds = np.where(scores[:, j] > thresh)[0]
cls_scores = scores[inds, j]
cls_boxes = boxes[inds, j*4:(j+1)*4]
cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
keep = nms(cls_dets, cfg.TEST.NMS)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets# 第i個圖像的第j個類的所有檢測結果,都放在cls_dets裏面了。
# Limit to max_per_image detections *over all classes*
if max_per_image > 0:
image_scores = np.hstack([all_boxes[j][i][:, -1]#將得分拼成一個向量
for j in range(1, imdb.num_classes)])
if len(image_scores) > max_per_image:
image_thresh = np.sort(image_scores)[-max_per_image]#從小到大排序,加了負號,就是從大到小排序
for j in range(1, imdb.num_classes):
keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]#Keep是計算得到的保留結果,滿足閾值條件的才保留。
all_boxes[j][i] = all_boxes[j][i][keep, :]#這時候得到的all boxes是經過得分過濾後的。閾值是動態的,取得分排序第100名的得分爲閾值
_t['misc'].toc()
#打印檢測圖像進度、圖像名字、檢測時間。
print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
.format(i + 1, num_images, _t['im_detect'].average_time,
_t['misc'].average_time))
det_file = os.path.join(output_dir, 'detections.pkl')#調用輸出路徑保存檢測結果
with open(det_file, 'wb') as f:
pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)#將檢測結果寫入detections.pkl
print('Evaluating detections')#對檢測結果的保存進行檢查。
imdb.evaluate_detections(all_boxes, output_dir)