few-shot-learning for object detection

github https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function
import sys

import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.autograd import Variable
from tqdm import tqdm

import dataset
import random
import math
import os
from utils import *
from cfg import parse_cfg, cfg
from darknet import Darknet
import pdb

# Training settings
# datacfg = sys.argv[1]
# darknetcfg = parse_cfg(sys.argv[2])
# learnetcfg = parse_cfg(sys.argv[3])

datacfg = "cfg/fewyolov3_voc.data"
darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
weightfile = "tmp/000050.weights"
if len(sys.argv) == 5:
    weightfile = sys.argv[4]

data_options = read_data_cfg(datacfg)
net_options = darknetcfg[0]
meta_options = learnetcfg[0]

# Configure options
cfg.config_data(data_options)
cfg.config_meta(meta_options)
cfg.config_net(net_options)

# Parameters
metadict = data_options['meta']
trainlist = data_options['train']

testlist = data_options['valid']
backupdir = data_options['backup']
gpus = data_options['gpus']  # e.g. 0,1,2,3
ngpus = len(gpus.split(','))
num_workers = int(data_options['num_workers'])

batch_size = int(net_options['batch'])
print("batch_size:",batch_size)
max_batches = int(net_options['max_batches'])
learning_rate = float(data_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in data_options['steps'].split(',')]
scales = [float(scale) for scale in data_options['scales'].split(',')]

# Train parameters
use_cuda = True
seed = int(time.time())

## --------------------------------------------------------------------------
## MAIN
backupdir = cfg.backup
print('logging to ' + backupdir)
if not os.path.exists(backupdir):
    os.makedirs(backupdir)

torch.manual_seed(seed)
if use_cuda:
    os.environ['CUDA_VISIBLE_DEVICES'] = gpus
    torch.cuda.manual_seed(seed)

model = Darknet(darknetcfg, learnetcfg)
region_loss = model.loss

model.print_network()
# if len(sys.argv) == 5:
model.load_weights(weightfile)

###################################################
### Meta-model parameters
region_loss.seen = model.seen
processed_batches = 0 if cfg.tuning else model.seen / batch_size
trainlist = dataset.build_dataset(data_options)
nsamples = len(trainlist)
init_width = model.width
init_height = model.height
init_epoch = 0 if cfg.tuning else model.seen / nsamples
max_epochs = max_batches * batch_size / nsamples + 1
max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
print(cfg.repeat, nsamples, max_batches, batch_size)
print(num_workers)

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}

if use_cuda:
    if ngpus > 1:
        model = torch.nn.DataParallel(model).cuda()
    else:
        model = model.cuda()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)


def adjust_learning_rate(optimizer, processed_batches):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = learning_rate
    for i in range(len(steps)):
        scale = scales[i] if i < len(scales) else 1
        if processed_batches >= steps[i]:
            lr = lr * scale
            if processed_batches == steps[i]:
                break
        else:
            break
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr


def train(epoch):
    global processed_batches
    t0 = time.time()
    if ngpus > 1:
        cur_model = model.module
    else:
        cur_model = model

    train_loader = torch.utils.data.DataLoader(
        dataset.listDataset(trainlist, shape=(init_width, init_height),
                            shuffle=False,
                            transform=transforms.Compose([
                                transforms.ToTensor(),
                            ]),
                            train=True,
                            seen=cur_model.seen,
                            batch_size=batch_size,
                            num_workers=num_workers),
        batch_size=batch_size, shuffle=False, **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)
    metaloader = torch.utils.data.DataLoader(
        metaset,
        batch_size=metaset.batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )
    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)
    logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr))

    model.train()
    t1 = time.time()
    avg_time = torch.zeros(9)
    with tqdm(total=train_loader.__len__()) as t:

        for batch_idx, (data, target) in enumerate(train_loader):
            metax, mask = metaloader.next()
            t2 = time.time()
            adjust_learning_rate(optimizer, processed_batches)
            processed_batches = processed_batches + 1
            if use_cuda:
                data = data.cuda()
                metax = metax.cuda()
                mask = mask.cuda()
                # target= target.cuda()
            t3 = time.time()
            data, target = Variable(data), Variable(target)
            metax, mask = Variable(metax), Variable(mask)
            t4 = time.time()
            optimizer.zero_grad()
            t5 = time.time()
            output = model(data, metax, mask)
            t6 = time.time()
            region_loss.seen = region_loss.seen + data.data.size(0)
            cur_model.seen = region_loss.seen
            region_loss.input_size = (data.data.size(2), data.data.size(3))
            loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
            t.set_description('Epoch %d' % epoch)
            t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
                          cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
            t.update()

            t7 = time.time()
            loss.backward()
            t8 = time.time()
            optimizer.step()
            t9 = time.time()
            if False and batch_idx > 1:
                avg_time[0] = avg_time[0] + (t2 - t1)
                avg_time[1] = avg_time[1] + (t3 - t2)
                avg_time[2] = avg_time[2] + (t4 - t3)
                avg_time[3] = avg_time[3] + (t5 - t4)
                avg_time[4] = avg_time[4] + (t6 - t5)
                avg_time[5] = avg_time[5] + (t7 - t6)
                avg_time[6] = avg_time[6] + (t8 - t7)
                avg_time[7] = avg_time[7] + (t9 - t8)
                avg_time[8] = avg_time[8] + (t9 - t1)
                print('-------------------------------')
                print('       load data : %f' % (avg_time[0] / (batch_idx)))
                print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
                print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
                print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))
                print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
                print('    forward loss : %f' % (avg_time[5] / (batch_idx)))
                print('        backward : %f' % (avg_time[6] / (batch_idx)))
                print('            step : %f' % (avg_time[7] / (batch_idx)))
                print('           total : %f' % (avg_time[8] / (batch_idx)))
            t1 = time.time()
        print('')
        t1 = time.time()
        logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0)))

        if (epoch + 1) % cfg.save_interval == 0:
            logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
            cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))

init_epoch = int(init_epoch)
max_epochs = int(max_epochs)
print("init_epoch:",init_epoch)
print("max_epochs:",max_epochs)
for epoch in range(init_epoch, max_epochs):
    train(epoch)

region_loss.py

import time
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
from utils import *
from cfg import cfg
from numbers import Number
from random import random, randint
import pdb


def neg_filter(pred_boxes, target, withids=False):
    assert pred_boxes.size(0) == target.size(0)
    if cfg.neg_ratio == 'full':
        inds = list(range(pred_boxes.size(0)))
    elif isinstance(cfg.neg_ratio, Number):
        flags = torch.sum(target, 1) != 0
        flags = flags.cpu().data.tolist()
        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
        if ratio >= 1:
            inds = list(range(pred_boxes.size(0)))
        else:
            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
            inds = np.argwhere(flags).squeeze()
            pred_boxes, target = pred_boxes[inds], target[inds]
    else:
        raise NotImplementedError('neg_ratio not recognized')
    if withids:
        return pred_boxes, target, inds
    else:
        return pred_boxes, target


def neg_filter_v2(pred_boxes, target, withids=False):
    assert pred_boxes.size(0) == target.size(0)
    if cfg.neg_ratio == 'full':
        inds = list(range(pred_boxes.size(0)))
    elif isinstance(cfg.neg_ratio, Number):
        flags = torch.sum(target, 1) != 0
        flags = flags.cpu().data.tolist()
        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
        if ratio >= 1:
            inds = list(range(pred_boxes.size(0)))
        else:
            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
            if sum(flags) == 0:
                flags[randint(0, len(flags) - 1)] = 1
            inds = np.nonzero(flags)[0]
            pred_boxes, target = pred_boxes[inds], target[inds]
    else:
        raise NotImplementedError('neg_ratio not recognized')
    if withids:
        return pred_boxes, target, inds
    else:
        return pred_boxes, target


def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
    nB = target.size(0)
    nA = num_anchors
    # print('anchor_step: ', anchor_step)
    obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
    noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
    tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
    iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tboxes = target.view(-1, 5)
    nonzero_ind = tboxes[:, 3] > 0
    tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
    ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
    ind_B = ind_B[nonzero_ind]
    gx = (tboxes[:, 1] * feature_size[1]).float()
    gy = (tboxes[:, 2] * feature_size[0]).float()
    gw = (tboxes[:, 3] * input_size[1]).float()
    gh = (tboxes[:, 4] * input_size[0]).float()
    aw = anchors[:, 0]
    ah = anchors[:, 1]
    nbox = tboxes.size(0)
    gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
    anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
    ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
    best_ious, best_a = ious.max(1)
    gj = gy.long()
    gi = gx.long()
    obj_mask[ind_B, best_a, gj, gi] = 1
    noobj_mask[ind_B, best_a, gj, gi] = 0

    for i, iou in enumerate(ious):
        if (iou > ignore_thresh).sum():
            noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0

    tx[ind_B, best_a, gj, gi] = gx - gx.floor()
    ty[ind_B, best_a, gj, gi] = gy - gy.floor()
    tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
    th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
    tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
    tconf = obj_mask.float()
    pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
    conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
    target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
                              (tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
                              gw.unsqueeze(0),
                              gh.unsqueeze(0)], 0)

    iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
    conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
    detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
    detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50

    return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75


class RegionLoss(nn.Module):
    def __init__(self, num_classes=0, anchors=[], num_anchors=1):
        super(RegionLoss, self).__init__()
        self.num_classes = num_classes
        self.anchors = anchors
        self.num_anchors = num_anchors
        self.anchor_step = len(anchors) / num_anchors
        self.coord_scale = 1
        self.noobject_scale = 1
        self.object_scale = 5
        self.class_scale = 1
        self.thresh = 0.6
        self.seen = 0

    def forward(self, output, target):
        # import pdb; pdb.set_trace()
        # output : BxAs*(4+1+num_classes)*H*W

        # if target.dim() == 3:
        #     # target : B * n_cls * l
        #     l = target.size(-1)
        #     target = target.permute(1,0,2).contiguous().view(-1, l)
        if target.dim() == 3:
            target = target.view(-1, target.size(-1))
        bef = target.size(0)
        output, target = neg_filter(output, target)
        # print("{}/{}".format(target.size(0), bef))

        t0 = time.time()
        nB = output.data.size(0)
        nA = self.num_anchors
        nC = self.num_classes
        nH = output.data.size(2)
        nW = output.data.size(3)

        output = output.view(nB, nA, (5 + nC), nH, nW)
        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
        cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)

        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
        grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
        anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
        anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
        pred_boxes[0] = x.data + grid_x
        pred_boxes[1] = y.data + grid_y
        pred_boxes[2] = torch.exp(w.data) * anchor_w
        pred_boxes[3] = torch.exp(h.data) * anchor_h
        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
        t2 = time.time()

        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
                                                                                                    target.data,
                                                                                                    self.anchors, nA,
                                                                                                    nC, \
                                                                                                    nH, nW,
                                                                                                    self.noobject_scale,
                                                                                                    self.object_scale,
                                                                                                    self.thresh,
                                                                                                    self.seen)
        cls_mask = (cls_mask == 1)
        if cfg.metayolo:
            tcls.zero_()
        nProposals = int((conf > 0.25).float().sum().data[0])

        tx = Variable(tx.cuda())
        ty = Variable(ty.cuda())
        tw = Variable(tw.cuda())
        th = Variable(th.cuda())
        tconf = Variable(tconf.cuda())
        tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())

        coord_mask = Variable(coord_mask.cuda())
        conf_mask = Variable(conf_mask.cuda().sqrt())
        cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
        cls = cls[cls_mask].view(-1, nC)

        t3 = time.time()

        loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
        loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
        loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
        loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
        loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
        t4 = time.time()
        if False:
            print('-----------------------------------')
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
            self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
            loss_conf.data[0], loss_cls.data[0], loss.data[0]))
        return loss


class RegionLossV2(nn.Module):
    """
    Yolo region loss + Softmax classification across meta-inputs
    """

    def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
        super(RegionLossV2, self).__init__()
        self.num_classes = num_classes
        self.anchors = anchors
        self.num_anchors = num_anchors
        self.coord_scale = 1
        self.class_scale = 1
        self.obj_scale = 1
        self.noobj_scale = 100
        self.thresh = 0.5
        self.seen = 0
        self.input_size = input_size
        self.feature_scale = [32, 16, 8]
        print('class_scale', self.class_scale)

    def forward(self, output, target):
        # output : (bs*cs, nA*(5+1), N)
        # target : (bs, cs, 50*5)
        # Get all classification prediction
        # pdb.set_trace()
        bs = target.size(0)
        cs = target.size(1)
        nA = self.num_anchors
        nC = self.num_classes
        N = output.data.size(2)
        # feature_size = [[26, 26], [52, 52], [104, 104]]
        cls = output.view(output.size(0), nA, (5 + nC), N)
        cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
        cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
        cls_conf = F.softmax(cls, 1)
        _, cls_max_ids = torch.max(cls_conf, 1)
        cls_max_ids = cls_max_ids.data
        pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
        pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
        pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N)

        # Rearrange target and perform filtering operation
        target = target.view(-1, target.size(-1))
        # bef = target.size(0)
        output, target, inds = neg_filter_v2(output, target, withids=True)
        counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
        # print("{}/{}".format(target.size(0), bef))
        pre_cls_mask = pre_cls_mask[inds]

        t0 = time.time()
        nB = output.data.size(0)

        output = output.view(nB, nA, (5 + nC), N)  # (nB, nA, (5+nC), N)
        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2))  # (nB, nA, N)
        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
        # cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
        # cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
        grid_x = []
        grid_y = []
        anchor_w = []
        anchor_h = []
        scale = []
        feature_size = []
        for fs in self.feature_scale:
            feature_h = self.input_size[0] // fs
            feature_w = self.input_size[1] // fs
            # print("feature_h:",feature_h)
            # print("feature_w:",feature_w)
            feature_size.append([feature_h, feature_w])
            grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) \
                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
            grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() \
                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
            scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
        grid_x = torch.cat(grid_x, 2)  # (nB, nA, N)
        grid_y = torch.cat(grid_y, 2)
        scale = torch.cat(scale, 2)
        for i in range(3):
            aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
                .index_select(1, torch.LongTensor([0])).cuda()
            ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
                .index_select(1, torch.LongTensor([1])).cuda()
            anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
            anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))
        anchor_w = torch.cat(anchor_w, 2)
        anchor_h = torch.cat(anchor_h, 2)
        pred_boxes[0] = (x.data + grid_x) * scale
        pred_boxes[1] = (y.data + grid_y) * scale
        pred_boxes[2] = torch.exp(w.data) * anchor_w
        pred_boxes[3] = torch.exp(h.data) * anchor_h
        pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous())  # (nB, nA, N, 4)
        t2 = time.time()
        nGT = 0
        iou_scores = []
        obj_mask = []
        noobj_mask = []
        tx = []
        ty = []
        tw = []
        th = []
        tconf = []
        tcls = []
        start_N = 0
        detected50 = torch.zeros(0)
        detected75 = torch.zeros(0)
        for imap in range(3):
            nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, \
            tcls_temp, detected50_temp, detected75_temp = build_targets(
                pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
                target.data.cuda(),
                conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
                torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
                nA,
                feature_size[imap],
                self.input_size,
                self.thresh)
            if not len(detected50):
                detected50 = torch.zeros(nGT).cuda()
            if not len(detected75):
                detected75 = torch.zeros(nGT).cuda()
            detected50 += detected50_temp
            detected75 += detected75_temp
            start_N += feature_size[imap][0] * feature_size[imap][1]
            iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
            tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

        iou_scores = torch.cat(iou_scores, 2)
        obj_mask = torch.cat(obj_mask, 2)
        noobj_mask = torch.cat(noobj_mask, 2)
        tx = torch.cat(tx, 2)
        ty = torch.cat(ty, 2)
        tw = torch.cat(tw, 2)
        th = torch.cat(th, 2)
        tconf = torch.cat(tconf, 2)
        tcls = torch.cat(tcls, 2)

        # Take care of class mask
        idx_start = 0
        cls_mask_list = []
        tcls_list = []
        for i in range(len(counts)):
            if counts[i] == 0:
                cur_mask = torch.zeros(nA, N).cuda()
                cur_tcls = torch.zeros(nA, N).cuda()
            else:
                cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
                cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
            cls_mask_list.append(cur_mask)
            tcls_list.append(cur_tcls)
            idx_start += counts[i]
        cls_mask = torch.stack(cls_mask_list)  # (bs, nA, N)
        tcls = torch.stack(tcls_list)

        cls_mask = (cls_mask == 1)
        conf50 = (conf > 0.5).float().data
        iou50 = (iou_scores > 0.5).float()
        detected_mask = conf50 * tconf
        precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
        detected50 = (detected50 > 0).float()
        detected75 = (detected75 > 0).float()
        recall50 = detected50.sum() / (nGT + 1e-16)
        recall75 = detected75.sum() / (nGT + 1e-16)
        nProposals = int((conf > 0.25).float().sum().item())
        tx = Variable(tx)
        ty = Variable(ty)
        tw = Variable(tw)
        th = Variable(th)
        tconf = Variable(tconf)

        obj_mask = Variable(obj_mask.bool())
        noobj_mask = Variable(noobj_mask.bool())
        # cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
        cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
        cls_max_ids = cls_max_ids[cls_mask.view(-1)]
        tcls = Variable(tcls[cls_mask].long())
        cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16)

        ClassificationLoss = nn.CrossEntropyLoss()
        MseLoss = nn.MSELoss()
        BceLoss = nn.BCELoss()

        t3 = time.time()

        loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
        loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
        loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
        loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
        loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
        loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
        loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
        if len(cls):
            loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
        else:
            loss_cls = Variable(torch.Tensor([0]).float().cuda())

        # # pdb.set_trace()
        # ids = [9,11,12,16]
        # new_cls, new_tcls = select_classes(cls, tcls, ids)
        # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
        # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
        # loss_cls_new *= 10
        # loss_cls += loss_cls_new

        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
        t4 = time.time()
        if False:
            print('-----------------------------------')
            print('        activation : %f' % (t1 - t0))
            print(' create pred_boxes : %f' % (t2 - t1))
            print('     build targets : %f' % (t3 - t2))
            print('       create loss : %f' % (t4 - t3))
            print('             total : %f' % (t4 - t0))
        # print(
        #     '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % \
        #     (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), \
        #      loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
        # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
        return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals


def select_classes(pred, tgt, ids):
    # convert tgt to numpy
    tgt = tgt.cpu().data.numpy()
    new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
    new_tgt = np.max(np.stack(new_tgt), axis=0)
    idxes = np.argwhere(new_tgt > 0).squeeze()
    new_pred = pred[idxes]
    new_pred = new_pred[:, ids]
    new_tgt = new_tgt[idxes]
    return new_pred, new_tgt

few-shot-learning for object detection

詐騙（殺豬盤）網站進行滲透測試

Python 潮流週刊#50：我最喜歡的 Python 3.13 新特性！

【Python】保存gym截圖

【譯】使用 GitHub Copilot 作爲你的編碼 GPS

Linux 服務器配置-安裝portainer-ce社區版

外行也能讀懂的網絡硬件設備功能原理速成

安裝Auto-GPT

策略梯度玩 cartpole 遊戲，強化學習代替PID算法控制平衡杆

deepspeed 訓練多機多卡報錯 ncclSystemError Last error

如何實現圖像搜索，文搜圖，圖搜圖，CLIP+faiss向量數據庫實現圖像高效搜索

使用單卡qlora混合精度訓練大模型chatGLM2-6b，解決qlora loss變成nan的問題！

我用numpy實現了VIT，手寫vision transformer, 可在樹莓派上運行，在hugging face上訓練模型保存參數成numpy格式，純numpy實現

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結