Faster R-CNN 數據組織代碼解析

最近想花點時間對Faster R-CNN等經典的目標檢測代碼進行註釋和學習,同時留下學習筆記,和大家一同努力進步~~~於是乎,有着此篇的博文。
Faster R-CNN作爲RCNN系列的第三篇文章,主要爲RPN網絡的訓練,再進行Fast rcnn的訓練,兩個部分交替訓練的方式,最終得到滿意的目標檢測結果。本文主要對Fast RCNN部分的數據組織代碼( fast_rcnn_prepare_image_roidb函數)進行解析,使用matlab源碼。數據組織部分的代碼作爲修改代碼爲自己所用的第一步,學習很有必要。
代碼引用自:https://github.com/ShaoqingRen/faster_rcnn

function [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, bbox_means, bbox_stds)
% [image_roidb, bbox_means, bbox_stds] = fast_rcnn_prepare_image_roidb(conf, imdbs, roidbs, cache_img, bbox_means, bbox_stds)
% 
% 從imdb和roidb數據庫文件中計算bounding box均值和標準差,用於迴歸於標準化
% --------------------------------------------------------
% Fast R-CNN
% Reimplementation based on Python Fast R-CNN (https://github.com/rbgirshick/fast-rcnn)
% Copyright (c) 2015, Shaoqing Ren
% Licensed under The MIT License [see LICENSE for details]
% -------------------------------------------------------- 

    if ~exist('bbox_means', 'var')
        bbox_means = [];
        bbox_stds = [];
    end

    if ~iscell(imdbs)
        imdbs = {imdbs};
        roidbs = {roidbs};
    end

    imdbs = imdbs(:);
    roidbs = roidbs(:);

    % 將數據結構體裏的項重新組織,添加image與bbox_targets項
    image_roidb = ...
        cellfun(@(x, y) ... // @(imdbs, roidbs)
                arrayfun(@(z) ... //@([1:length(x.image_ids)])
                        struct('image_path', x.image_at(z), 'image_id', x.image_ids{z}, 'im_size', x.sizes(z, :), 'imdb_name', x.name, ...
                        'overlap', y.rois(z).overlap, 'boxes', y.rois(z).boxes, 'class', y.rois(z).class, 'image', [], 'bbox_targets', []), ...
                [1:length(x.image_ids)]', 'UniformOutput', true),...
        imdbs, roidbs, 'UniformOutput', false);

    image_roidb = cat(1, image_roidb{:});

    % 保證roidb中包含bounding-box與targets的迴歸量,具體計算見下面的函數
    [image_roidb, bbox_means, bbox_stds] = append_bbox_regression_targets(conf, image_roidb, bbox_means, bbox_stds);
end

function [image_roidb, means, stds] = append_bbox_regression_targets(conf, image_roidb, means, stds)
    % means and stds -- (k+1) * 4, include background class

    num_images = length(image_roidb);

    % 從gt_overlaps列判斷一共有幾類
    num_classes = size(image_roidb(1).overlap, 2);

    % valid_imgs:判斷是否所有圖像都有正負樣本,即產生proposal於gt重疊有大於閾值的和小於閾值的,如果都爲0,則剔除該圖
    valid_imgs = true(num_images, 1);
    for i = 1:num_images
       rois = image_roidb(i).boxes; % rois爲proposal階段產生的boxes
       [image_roidb(i).bbox_targets, valid_imgs(i)] = ...
           compute_targets(conf, rois, image_roidb(i).overlap); % 計算bbox_tragets和需要剔除圖片,見最後的函數
    end
    if ~all(valid_imgs)
        image_roidb = image_roidb(valid_imgs);
        num_images = length(image_roidb);
        fprintf('Warning: fast_rcnn_prepare_image_roidb: filter out %d images, which contains zero valid samples\n', sum(~valid_imgs));
    end

    if ~(exist('means', 'var') && ~isempty(means) && exist('stds', 'var') && ~isempty(stds))
        % 計算均值和標準差
        % var(x) = E(x^2) - E(x)^2
        class_counts = zeros(num_classes, 1) + eps;
        sums = zeros(num_classes, 4);
        squared_sums = zeros(num_classes, 4);
        for i = 1:num_images
           targets = image_roidb(i).bbox_targets;%image_roidb(i).bbox_targets爲標記的可以認爲是正負樣本的proposal box
           for cls = 1:num_classes
              cls_inds = find(targets(:, 1) == cls);
              if ~isempty(cls_inds)
                 class_counts(cls) = class_counts(cls) + length(cls_inds); 
                 sums(cls, :) = sums(cls, :) + sum(targets(cls_inds, 2:end), 1);
                 squared_sums(cls, :) = squared_sums(cls, :) + sum(targets(cls_inds, 2:end).^2, 1);%loss
              end
           end
        end

        % 類別均值與標準差
        means = bsxfun(@rdivide, sums, class_counts);
        stds = (bsxfun(@minus, bsxfun(@rdivide, squared_sums, class_counts), means.^2)).^0.5;

        % 添加背景類
        means = [0, 0, 0, 0; means]; 
        stds = [0, 0, 0, 0; stds];
    end

    % 對targets進行規範化,完成fast_rcnn部分所需要的完整數據類型
    for i = 1:num_images
        targets = image_roidb(i).bbox_targets;
        for cls = 1:num_classes
            cls_inds = find(targets(:, 1) == cls);
            if ~isempty(cls_inds)
                image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
                    bsxfun(@minus, image_roidb(i).bbox_targets(cls_inds, 2:end), means(cls+1, :));
                image_roidb(i).bbox_targets(cls_inds, 2:end) = ...
                    bsxfun(@rdivide, image_roidb(i).bbox_targets(cls_inds, 2:end), stds(cls+1, :));
            end
        end
    end
end


function [bbox_targets, is_valid] = compute_targets(conf, rois, overlap) % 計算bbox_tragets和需要剔除圖片

    overlap = full(overlap);

    [max_overlaps, max_labels] = max(overlap, [], 2);

    % 確保ROIs是floats
    rois = single(rois);

    % 新建5列的變量bbox_targets
    bbox_targets = zeros(size(rois, 1), 5, 'single');

    % ground-truth ROIs用 gt_inds索引
    gt_inds = find(max_overlaps == 1);

    if ~isempty(gt_inds)

        % 當overlaps大於一定閾值的索引即ex_inds,這些bbox是希望用來做預測的bbox
        ex_inds = find(max_overlaps >= conf.bbox_thresh);

        % 對每一個 ex ROI 和 gt ROI 計算IoU重疊度
        ex_gt_overlaps = boxoverlap(rois(ex_inds, :), rois(gt_inds, :));

        assert(all(abs(max(ex_gt_overlaps, [], 2) - max_overlaps(ex_inds)) < 1^-6));

        % 找到ex ROI對應的最大重疊度的 gt ROI,作爲該ex ROI的gt target
        [~, gt_assignment] = max(ex_gt_overlaps, [], 2);
        gt_rois = rois(gt_inds(gt_assignment), :);
        ex_rois = rois(ex_inds, :);

        % 根據我們用來預測的bbox:ex ROI和groundtruth對應的gt ROI計算下一步需要回歸的座標
        [regression_label] = fast_rcnn_bbox_transform(ex_rois, gt_rois);

        % 用bbox_tragets記錄作爲正樣本的bbox與迴歸座標
        bbox_targets(ex_inds, :) = [max_labels(ex_inds), regression_label];
    end

    % 標記proposal正樣本, ROIs >= fg_thresh overlap
    is_fg = max_overlaps >= conf.fg_thresh;

    % 標記proposal負樣本ROIs 處於 [bg_thresh_lo, bg_thresh_hi)
    is_bg = max_overlaps < conf.bg_thresh_hi & max_overlaps >= conf.bg_thresh_lo;

    % 當圖中沒有任何正負樣本時,剔除該圖
    is_valid = true;
    if ~any(is_fg | is_bg)
        is_valid = false;
    end
end
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章