Faster RCNN pascal_voc.py

主要定義了一個pascal_voc類,在類的內部定義了它的一些屬性和方法。

def _init_(self, image_set, year, devkit_path=None) 構造器方法

def __init__(self, image_set, year, devkit_path=None):
        imdb.__init__(self, 'voc_' + year + '_' + image_set)
        self._year = year
        self._image_set = image_set
        # print '~~~~~~~~~~~~~~~~~~~PASCAL_VOC OBJECT _image_set: {}'.format(self._image_set) # trainval
        self._devkit_path = self._get_default_path() if devkit_path is None \
                            else devkit_path
        self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
        self._classes = ('__background__', # always index 0
                         'aeroplane', 'bicycle', 'bird', 'boat',
                         'bottle', 'bus', 'car', 'cat', 'chair',
                         'cow', 'diningtable', 'dog', 'horse',
                         'motorbike', 'person', 'pottedplant',
                         'sheep', 'sofa', 'train', 'tvmonitor')
        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
        self._image_ext = '.jpg'
        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        # self.selective_search_roidb是一個函數對象,把這個函數對象付給_roidb_handler屬性
        self._roidb_handler = self.selective_search_roidb
        self._salt = str(uuid.uuid4())
        self._comp_id = 'comp4'

        # PASCAL specific config options
        self.config = {'cleanup'     : True,
                       'use_salt'    : True,
                       'use_diff'    : False,
                       'matlab_eval' : False,
                       'rpn_file'    : None,
                       'min_size'    : 2}

        assert os.path.exists(self._devkit_path), \
                'VOCdevkit path does not exist: {}'.format(self._devkit_path)
        assert os.path.exists(self._data_path), \
                'Path does not exist: {}'.format(self._data_path)

def gt_roidb(self) 以’gt’ 方法生成roidb 。其中會調用_load_pascal_annotation方法從下載的數據文件annotation中載入圖像的ground-truth信息。

def gt_roidb(self):
        """
        Return the database of ground-truth regions of interest.

        This function loads/saves from/to a cache file to speed up future calls.
        """
        cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
        if os.path.exists(cache_file):
            with open(cache_file, 'rb') as fid:
                roidb = cPickle.load(fid)
            print '{} gt roidb loaded from {}'.format(self.name, cache_file)
            return roidb

        gt_roidb = [self._load_pascal_annotation(index)
                    for index in self.image_index]
        with open(cache_file, 'wb') as fid:
            cPickle.dump(gt_roidb, fid, cPickle.HIGHEST_PROTOCOL)
        print 'wrote gt roidb to {}'.format(cache_file)

        return gt_roidb

def _load_pascal_annotation(self, index) 從XML文件載入圖像信息,而且是ground-truth信息,比如boxes

def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        tree = ET.parse(filename)
        objs = tree.findall('object')
        if not self.config['use_diff']:
            # Exclude the samples labeled as difficult
            non_diff_objs = [
                obj for obj in objs if int(obj.find('difficult').text) == 0]
            # if len(non_diff_objs) != len(objs):
            #     print 'Removed {} difficult objects'.format(
            #         len(objs) - len(non_diff_objs))
            objs = non_diff_objs
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)

        # overlaps爲 num_objs * K 的數組, K表示總共的類別數, num_objs表示當前這張圖片中box的個數
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        # "Seg" area for pascal is just the box area
        seg_areas = np.zeros((num_objs), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            bbox = obj.find('bndbox')
            # Make pixel indexes 0-based
            x1 = float(bbox.find('xmin').text) - 1
            y1 = float(bbox.find('ymin').text) - 1
            x2 = float(bbox.find('xmax').text) - 1
            y2 = float(bbox.find('ymax').text) - 1
            cls = self._class_to_ind[obj.find('name').text.lower().strip()]
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            # 從anatation直接載入圖像的信息,因爲本身就是ground-truth , 所以overlap直接設爲1
            overlaps[ix, cls] = 1.0
            seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)

        overlaps = scipy.sparse.csr_matrix(overlaps)

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : seg_areas}

def rpn_roidb(self): 以‘rpn’ 方法生成roidb。先調用gt_roidb生成gt_roidb, 然後調用_load_rpn_roidb載入rpn_roidb, 最後調用其父類的靜態方法imdb.merge_roidbs將兩者合併,即對於最後生成的roidb中每一張圖像中,即包含gt_roidb中的box等信息,也包含rpn_roidb 中的box等信息。

def rpn_roidb(self):
        if int(self._year) == 2007 or self._image_set != 'test':
            gt_roidb = self.gt_roidb()
            # 求取rpn_roidb需要以gt_roidb作爲參數才能得到
            rpn_roidb = self._load_rpn_roidb(gt_roidb)
            roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
        else:
            roidb = self._load_rpn_roidb(None)
        return roidb

def _load_rpn_roidb(self, gt_roidb) 調用父類方法create_roidb_from_box_list 從box_list 中讀取每張圖像的boxes

def _load_rpn_roidb(self, gt_roidb):
        filename = self.config['rpn_file']
        print 'loading {}'.format(filename)
        assert os.path.exists(filename), \
               'rpn data not found at: {}'.format(filename)
        with open(filename, 'rb') as f:
            # 讀取rpn_file裏的box,形成box_list; box_list爲一個列表,每張圖像對應其中的一個元素,
            # 所以box_list 的大小要與gt_roidb 相同
            box_list = cPickle.load(f)
        return self.create_roidb_from_box_list(box_list, gt_roidb)

def create_roidb_from_box_list(self, box_list, gt_roidb): 從box_list 中讀取每張圖像的boxes

def create_roidb_from_box_list(self, box_list, gt_roidb):

        # box_list 的大小要與gt_roidb 相同, 並且各圖像一一對應
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            # 遍歷每張圖像, 當前圖像中box的個數
            boxes = box_list[i]
            # 當前這張圖像中的box個數
            num_boxes = boxes.shape[0]
            # overlaps的shape始終爲:num_boxes × num_classes 。
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                # 計算當前圖像的rpn_file中記錄的boxes與gtboxes的IOU overlap, 返回的gt_overlaps的
                #shape爲 num_boxes × num_gtboxes, 後面要對gt_overlaps求max和argmax
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                # 對gt_overlaps 求argmax 和 max
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                # 返回 maxes > 0的位置信息
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes' : boxes,
                # gt_classes 爲一個全0一維數組(這是爲什麼????)
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                # 最終還是將shape爲num_boxes × num_classes 的數組進行存儲, 所以,以‘rpn’方法生成的
                #rpn_roidb中的gt_overlaps是rpn_file中的box與gt_roidb中box的overlap,而不像
                #gt_roidb()方法生成的gt_roidb中的gt_overlaps全部爲1.0
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章