maskrcnn_benchmark理解記錄——modeling\roi_heads\keypoint_head\loss.py

這裏是keypoint的label的處理和損失計算

1.Matcher 
返回matches (Tensor[int64]):
其中N[i]在gt中[0,M-1]中有匹配返回N張量,,或者預測i不能匹配,返回負值。可以根據是張量還是負值簡單判斷是否匹配
    該類爲每個預測的“element”(例如,框)分配ground-truth元素。 每個預測元素將具有正好零或一個匹配; 
    每個ground-truth元素可以被分配給零個或多個預測元素。
    匹配基於MxN match_quality_matrix,其表徵每個(ground-truth, predicted)對的匹配程度。 
    例如,如果元素是框,則矩陣可以包含框IoU重疊值。
    匹配器返回大小爲N的張量,其包含與預測n匹配的ground-truth元素m的索引。 如果沒有匹配項,則返回負值。

2.BalancedPositiveNegativeSampler 用於平衡正負樣本比例的 一般是1:3,這裏是1:4
記錄下參數:            
1)MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 是每張圖片選擇的element數
 RoI minibatch size *per image* (number of regions of interest [ROIs])
 每個訓練的minibatch的總RoIs=TRAIN.BATCH_SIZE_PER_IM * TRAIN.IMS_PER_BATCH
 TRAIN.IMS_PER_BATCH=2    #E.g., 1 gpu 512*2*1=1024
 TRAIN.IMS_PER_BATCH=images per batch*GPU一般爲 IMS_PER_BATCH=2*num_gpu
 那其實TRAIN.IMS_PER_BATCH是設定的(一般一個gpu對應2),gpu是已知的,來但反向計算images per batch=2

2)MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.25  每個batch中正樣本/elements的比例
RoI minibatch的目標分數高於0.25標記爲前景(即class> 0)

3.loss_evaluator = KeypointRCNNLossComputation(matcher, fg_bg_sampler, resolution)

import torch
from torch.nn import functional as F

from maskrcnn_benchmark.modeling.matcher import Matcher

from maskrcnn_benchmark.modeling.balanced_positive_negative_sampler import (
    BalancedPositiveNegativeSampler,
)
from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
from maskrcnn_benchmark.modeling.utils import cat
from maskrcnn_benchmark.layers import smooth_l1_loss
from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist

from maskrcnn_benchmark.structures.keypoint import keypoints_to_heat_map


def project_keypoints_to_heatmap(keypoints, proposals, discretization_size):
    proposals = proposals.convert("xyxy")
    return keypoints_to_heat_map(
        keypoints.keypoints, proposals.bbox, discretization_size
    )


def cat_boxlist_with_keypoints(boxlists):
    assert all(boxlist.has_field("keypoints") for boxlist in boxlists)

    kp = [boxlist.get_field("keypoints").keypoints for boxlist in boxlists]
    kp = cat(kp, 0)

    fields = boxlists[0].get_fields()
    fields = [field for field in fields if field != "keypoints"]

    boxlists = [boxlist.copy_with_fields(fields) for boxlist in boxlists]
    boxlists = cat_boxlist(boxlists)
    boxlists.add_field("keypoints", kp)
    return boxlists


def _within_box(points, boxes):
    """Validate which keypoints are contained inside a given box.
    points: NxKx2
    boxes: Nx4
    output: NxK
    """
    x_within = (points[..., 0] >= boxes[:, 0, None]) & (
        points[..., 0] <= boxes[:, 2, None]
    )
    y_within = (points[..., 1] >= boxes[:, 1, None]) & (
        points[..., 1] <= boxes[:, 3, None]
    )
    return x_within & y_within


class KeypointRCNNLossComputation(object):
    def __init__(self, proposal_matcher, fg_bg_sampler, discretization_size):
        """
        Arguments:
            proposal_matcher (Matcher)
            fg_bg_sampler (BalancedPositiveNegativeSampler)
            discretization_size (int)  離散化大小
        """
        self.proposal_matcher = proposal_matcher
        self.fg_bg_sampler = fg_bg_sampler
        self.discretization_size = discretization_size

    #爲每個建議框匹配相應的gt
    def match_targets_to_proposals(self, proposal, target):
        match_quality_matrix = boxlist_iou(target, proposal)   #交併比啊
        matched_idxs = self.proposal_matcher(match_quality_matrix)

        # Keypoint RCNN needs "labels" and "keypoints "fields for creating the targets
        ##用“labels”和“keypoints”字段來創建目標targets

        target = target.copy_with_fields(["labels", "keypoints"])
        # get the targets corresponding GT for each proposal
        # NB: need to clamp the indices because we can have a single
        # GT in the image, and matched_idxs can be -2, which goes
        # out of bounds
        '''#爲每個proposal獲取相應GT的目標NB:需要鉗制索引,因爲我們可以在圖像中只有一個GT,而matched_idxs可以是-2,超出範圍'''
        matched_targets = target[matched_idxs.clamp(min=0)]
        matched_targets.add_field("matched_idxs", matched_idxs)
        return matched_targets

    def prepare_targets(self, proposals, targets):
        labels = []
        keypoints = []
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            matched_targets = self.match_targets_to_proposals(
                proposals_per_image, targets_per_image
            )
            matched_idxs = matched_targets.get_field("matched_idxs")

            labels_per_image = matched_targets.get_field("labels")
            labels_per_image = labels_per_image.to(dtype=torch.int64)

            # this can probably be removed, but is left here for clarity
            # and completeness
            # TODO check if this is the right one, as BELOW_THRESHOLD
            neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
            labels_per_image[neg_inds] = 0

            keypoints_per_image = matched_targets.get_field("keypoints")
            within_box = _within_box(
                keypoints_per_image.keypoints, matched_targets.bbox
            )
            vis_kp = keypoints_per_image.keypoints[..., 2] > 0
            is_visible = (within_box & vis_kp).sum(1) > 0

            labels_per_image[~is_visible] = -1

            labels.append(labels_per_image)
            keypoints.append(keypoints_per_image)

        return labels, keypoints

    def subsample(self, proposals, targets):
        """
        This method performs the positive/negative sampling, and return
        the sampled proposals.
        Note: this function keeps a state.

        Arguments:
            proposals (list[BoxList])
            targets (list[BoxList])
        """

        labels, keypoints = self.prepare_targets(proposals, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)

        proposals = list(proposals)
        # add corresponding label and regression_targets information to the bounding boxes
        # 將相應的label和regression_targets信息添加到邊界框中
        for labels_per_image, keypoints_per_image, proposals_per_image in zip(
            labels, keypoints, proposals
        ):
            proposals_per_image.add_field("labels", labels_per_image)
            proposals_per_image.add_field("keypoints", keypoints_per_image)

        # distributed sampled proposals, that were obtained on all feature maps
        # concatenated via the fg_bg_sampler, into individual feature map levels
        # 通過fg_bg_sampler連接的所有特徵圖上獲得的分佈式採樣proposals到單個feature map級別
        # 就是之前獲得的proposals是各層特徵圖的集合,現在分配到到各個特徵圖上
        for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
            zip(sampled_pos_inds, sampled_neg_inds)
        ):
            img_sampled_inds = torch.nonzero(pos_inds_img).squeeze(1)
            proposals_per_image = proposals[img_idx][img_sampled_inds]
            proposals[img_idx] = proposals_per_image

        self._proposals = proposals
        return proposals

    def __call__(self, proposals, keypoint_logits):
        heatmaps = []
        valid = []
        for proposals_per_image in proposals:
            kp = proposals_per_image.get_field("keypoints")
            heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap(
                kp, proposals_per_image, self.discretization_size,#0.25
            )
            heatmaps.append(heatmaps_per_image.view(-1))
            valid.append(valid_per_image.view(-1))

        keypoint_targets = cat(heatmaps, dim=0)
        valid = cat(valid, dim=0).to(dtype=torch.uint8)
        valid = torch.nonzero(valid).squeeze(1)

        # torch.mean (in binary_cross_entropy_with_logits) does'nt
        # accept empty tensors, so handle it sepaartely
        if keypoint_targets.numel() == 0 or len(valid) == 0:
            return keypoint_logits.sum() * 0

        N, K, H, W = keypoint_logits.shape
        keypoint_logits = keypoint_logits.view(N * K, H * W)

        keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
        return keypoint_loss

# 這裏是處理labels

def make_roi_keypoint_loss_evaluator(cfg):
    matcher = Matcher(
        cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, #0.5 將RoI視爲前景的IOU閾值(if > =0.5)
        cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, #0.5 ( if IOU在[0, 0.5)區間視爲class = 0,也就是背景)
        allow_low_quality_matches=False,
    )
    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE, cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION
    )                                    #BATCH_SIZE_PER_IMAGE=512   POSITIVE_FRACTION=0.25 前景
    resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.RESOLUTION         #56
    loss_evaluator = KeypointRCNNLossComputation(matcher, fg_bg_sampler, resolution)
    return loss_evaluator

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章