Tensorflow-Faster代码解析: test.py

 

lib\model\test.py里主要是test_net.py,用于faster测试的时候调用模型进行测试,并对结果进行保存。在tools/test_net.py里被调用。函数相对简单,但确实是测试时的整个流程了。基本每一句都写得很清楚了。

# --------------------------------------------------------
# Tensorflow Faster R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Xinlei Chen
#解析:nansbas
# --------------------------------------------------------
#-*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import cv2
import numpy as np
try:
  import cPickle as pickle
except ImportError:
  import pickle
import os
import math

from utils.timer import Timer
from utils.blob import im_list_to_blob

from model.config import cfg, get_output_dir
from model.bbox_transform import clip_boxes, bbox_transform_inv
from model.nms_wrapper import nms

def _get_image_blob(im):
  """Converts an image into a network input.
  Arguments:
    im (ndarray): a color image in BGR order
  Returns:
    blob (ndarray): a data blob holding an image pyramid
    im_scale_factors (list): list of image scales (relative to im) used
      in the image pyramid
  """
  #图像转换为float32。注意是bgr顺序,不是rgb顺序
  im_orig = im.astype(np.float32, copy=True)
  #原始图像减去均值。均值影响的是收敛速度,其实差不多在一个范围内就行。
  im_orig -= cfg.PIXEL_MEANS
  #提取最大、最小边
  im_shape = im_orig.shape
  im_size_min = np.min(im_shape[0:2])
  im_size_max = np.max(im_shape[0:2])
  #张开保存处理结果的张亮
  processed_ims = []
  #初始化尺度因子
  im_scale_factors = []
  #cfg.TEST.SCALES=600
  for target_size in cfg.TEST.SCALES:
    im_scale = float(target_size) / float(im_size_min)
    # Prevent the biggest axis from being more than MAX_SIZE
    if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
      im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
    #按上面计算得到的尺度因子im_scale缩放图像
    im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
            interpolation=cv2.INTER_LINEAR)
    im_scale_factors.append(im_scale)
    processed_ims.append(im)

  # Create a blob to hold the input images
  #调用im_list_to_blob,将改变好大小的图像封装成一个blob并且返回
  blob = im_list_to_blob(processed_ims)
  #返回Blob,以及尺度因子。

  return blob, np.array(im_scale_factors)

def _get_blobs(im):#将blob生成封装成一个更简单的方法,层级结构方便处理,其实返回的结果还是一样的。
  """Convert an image and RoIs within that image into network inputs."""
  blobs = {}
  blobs['data'], im_scale_factors = _get_image_blob(im)

  return blobs, im_scale_factors

def _clip_boxes(boxes, im_shape):#这个就是为了保证图像边缘的检测结果得到的框不超过图像大小。主要是x2和y2。一个保障机制,一般都不会发生作用。
  """Clip boxes to image boundaries."""
  # x1 >= 0
  boxes[:, 0::4] = np.maximum(boxes[:, 0::4], 0)
  # y1 >= 0
  boxes[:, 1::4] = np.maximum(boxes[:, 1::4], 0)
  # x2 < im_shape[1]
  boxes[:, 2::4] = np.minimum(boxes[:, 2::4], im_shape[1] - 1)
  # y2 < im_shape[0]
  boxes[:, 3::4] = np.minimum(boxes[:, 3::4], im_shape[0] - 1)
  return boxes

def _rescale_boxes(boxes, inds, scales):
  """Rescale boxes according to image rescaling."""
  for i in range(boxes.shape[0]):
    boxes[i,:] = boxes[i,:] / scales[int(inds[i])]

  return boxes

def im_detect(sess, net, im):#调用训练好的参数以及网络结构对图像进行训练,并返回检测到的目标类别以及座标信息。
  blobs, im_scales = _get_blobs(im)
  #保障机制,如果不是输入单张图像就报错并中断程序
  assert len(im_scales) == 1, "Only single-image batch implemented"
  im_blob = blobs['data']
  #用dtype指定数据类型为float32,并且将图像转为np的多维数组格式。
  blobs['im_info'] = np.array([im_blob.shape[1], im_blob.shape[2], im_scales[0]], dtype=np.float32)
  _, scores, bbox_pred, rois = net.test_image(sess, blobs['data'], blobs['im_info'])
  boxes = rois[:, 1:5] / im_scales[0]
  scores = np.reshape(scores, [scores.shape[0], -1])
  bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
  if cfg.TEST.BBOX_REG:
    # Apply bounding-box regression deltas
    #bbox_pred得到的结果从下面的处理可以看出,是regression的增量,在Boxes的基础上加上这个增量得到检测的Boxes
	#bbox_transform_inv是写好的Boxes和增量计算得到完整的检测座标的转换函数。
    box_deltas = bbox_pred
    pred_boxes = bbox_transform_inv(boxes, box_deltas)
    pred_boxes = _clip_boxes(pred_boxes, im.shape)
  else:
    # Simply repeat the boxes, once for each class
	#我们检测到这里还得到的还是得分,还没有类别信息,所以我们假设,每一类在这个位置都有一个框,就是简单的复制一下。scores.shape[1]其实就是类别数。
    pred_boxes = np.tile(boxes, (1, scores.shape[1]))

  return scores, pred_boxes

def apply_nms(all_boxes, thresh):#nms最大值抑制,用来避免很多框框叠加在一起的。注意这里程度Nms是cpu的。不记得从哪里看到的了,说cpu的Nms比gpu的快。
  """Apply non-maximum suppression to all predicted boxes output by the
  test_net method.
  """
  num_classes = len(all_boxes)
  num_images = len(all_boxes[0])
  nms_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
  for cls_ind in range(num_classes):#对于每一类
    for im_ind in range(num_images):#对于每一类的每一个检测结果应用Nms
      dets = all_boxes[cls_ind][im_ind]#dets为检测结果选取的检测结果
      if dets == []:#如果检测结果为空,比如该类没有检测结果,就跳过。
        continue
     #首先提取检测结果的四个座标以及得分
      x1 = dets[:, 0]
      y1 = dets[:, 1]
      x2 = dets[:, 2]
      y2 = dets[:, 3]
      scores = dets[:, 4]
	  #下面计算满足条件的索引,找到面积非0的矩形框。真正的nms在后面
      inds = np.where((x2 > x1) & (y2 > y1))[0]
	  #提取索引对应的框
      dets = dets[inds,:]
	  #所以目标不为空,就调用nms。nms的阈值也一并输入
      if dets == []:
        continue

      keep = nms(dets, thresh)
      if len(keep) == 0:
        continue
      #将nms的检测结果复制出来
      nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
	  #返回nms的检测结果
  return nms_boxes
#test_net.py是总函数,上面的函数都被这个函数调用。在test_net.py里面被调用。其实就是一个测试图像的整体流程主代码了。
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.):
  #首先设置随机种子
  np.random.seed(cfg.RNG_SEED)
  """Test a Fast R-CNN network on an image database."""
  num_images = len(imdb.image_index)
  # all detections are collected into:
  #  all_boxes[cls][image] = N x 5 array of detections in
  #  (x1, y1, x2, y2, score)
  #对于每张图像的每一类,张开一个空间,之后的结果就可以放进去。
  all_boxes = [[[] for _ in range(num_images)]
         for _ in range(imdb.num_classes)]
  output_dir = get_output_dir(imdb, weights_filename)
  # timers
  #打开计时器
  _t = {'im_detect' : Timer(), 'misc' : Timer()}
  #对于每一张测试图像
  for i in range(num_images):
    #调用路径,读入图像
    im = cv2.imread(imdb.image_path_at(i))
    #第一个计时器标签:记录检测的时间
    _t['im_detect'].tic()
	#调用im_detect对图像进行检测,返回框和属于每一类的得分
    scores, boxes = im_detect(sess, net, im)#the scores get from net function but this func.-qhy
    _t['im_detect'].toc()

    _t['misc'].tic()

    # skip j = 0, because it's the background class
    for j in range(1, imdb.num_classes):
      inds = np.where(scores[:, j] > thresh)[0]
      cls_scores = scores[inds, j]
      cls_boxes = boxes[inds, j*4:(j+1)*4]
      cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
        .astype(np.float32, copy=False)
      keep = nms(cls_dets, cfg.TEST.NMS)
      cls_dets = cls_dets[keep, :]
      all_boxes[j][i] = cls_dets# 第i个图像的第j个类的所有检测结果,都放在cls_dets里面了。

    # Limit to max_per_image detections *over all classes*
    if max_per_image > 0:
      image_scores = np.hstack([all_boxes[j][i][:, -1]#将得分拼成一个向量
                    for j in range(1, imdb.num_classes)])
      if len(image_scores) > max_per_image:
        image_thresh = np.sort(image_scores)[-max_per_image]#从小到大排序,加了负号,就是从大到小排序
        for j in range(1, imdb.num_classes):
          keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]#Keep是计算得到的保留结果,满足阈值条件的才保留。
          all_boxes[j][i] = all_boxes[j][i][keep, :]#这时候得到的all boxes是经过得分过滤后的。阈值是动态的,取得分排序第100名的得分为阈值
    _t['misc'].toc()
   #打印检测图像进度、图像名字、检测时间。
    print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
        .format(i + 1, num_images, _t['im_detect'].average_time,
            _t['misc'].average_time))

  det_file = os.path.join(output_dir, 'detections.pkl')#调用输出路径保存检测结果
  with open(det_file, 'wb') as f:
    pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)#将检测结果写入detections.pkl

  print('Evaluating detections')#对检测结果的保存进行检查。
  imdb.evaluate_detections(all_boxes, output_dir)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章