mmdetection代码-tool-test

tools/test.py

def main():
    args = parse_args()

    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
        raise ValueError('The output file must be a pkl file.')

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    #测试模式
    cfg.data.test.test_mode = True

    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    if args.gpus == 1:
        model = build_detector(
            cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
        load_checkpoint(model, args.checkpoint)
        model = MMDataParallel(model, device_ids=[0])

        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            num_gpus=1,
            dist=False,
            shuffle=False)
        outputs = single_test(model, data_loader, args.show)
    else:
        model_args = cfg.model.copy()
        model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
        model_type = getattr(detectors, model_args.pop('type'))
        outputs = parallel_test(
            model_type,
            model_args,
            args.checkpoint,
            dataset,
            _data_func,
            range(args.gpus),
            workers_per_gpu=args.proc_per_gpu)

    if args.out:
        print('writing results to {}'.format(args.out))
        mmcv.dump(outputs, args.out)
        eval_types = args.eval
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = args.out
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_file = args.out + '.json'
                    results2json(dataset, outputs, result_file)
                    coco_eval(result_file, eval_types, dataset.coco)
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = args.out + '.{}.json'.format(name)
                        results2json(dataset, outputs_, result_file)
                        coco_eval(result_file, eval_types, dataset.coco)


解释一下obj_from_dict方法

def obj_from_dict(info, parrent=None, default_args=None):
    """Initialize an object from dict.
    The dict must contain the key "type", which indicates the object type, it
    can be either a string or type, such as "list" or ``list``. Remaining
    fields are treated as the arguments for constructing the object.
    Args:
        info (dict): Object types and arguments.
        module (:class:`module`): Module which may containing expected object
            classes.
        default_args (dict, optional): Default arguments for initializing the
            object.
    Returns:
        any type: Object built from the dict.
    """
    assert isinstance(info, dict) and 'type' in info
    assert isinstance(default_args, dict) or default_args is None
    args = info.copy()
    obj_type = args.pop('type')
    if mmcv.is_str(obj_type):
        if parrent is not None:
            obj_type = getattr(parrent, obj_type)
        else:
            obj_type = sys.modules[obj_type]
    elif not isinstance(obj_type, type):
        raise TypeError('type must be a str or valid type, but got {}'.format(
            type(obj_type)))
    if default_args is not None:
        for name, value in default_args.items():
            args.setdefault(name, value)
    return obj_type(**args)

输入参数info,是一个cfg里的字典,比如这里的是test配置。
第二个参数是module,包含了期望的对象类别。
第三个是默认参数,用来初始化对象的
首先,判断info是不是字典,而且里面必须包含type关键字
默认参数也要检查是字典或者为None
然后,pop出type字典的值 obj+type
如果是字符串类型,在判断module(parrent)参数是否为空

  • 不为空,执行就从obj_type = getattr(parrent, obj_type)。测试时,parrent是datasets文件夹,obj_type是cfg中的test字典的type参数,所以相当于是从datasets文件夹里,加载相应名称的数据集读取程序dataset
    在这里插入图片描述

如果obj_type不是任何一种python类型,就报错。

然后,如果默认参数不为空,迭代读取每个默认参数的key:value,把这些字典添加到cfg中test字典的参数中(也就是args)。
最后返回一个数据集读取的类 return obj_type(**args)
obj_type是dataset类,args是配置参数,是一个字典。
同理,这个函数也可以给定cfg,module,其他参数三个输入来初始化任何一个对象,以上是以加载测试dataset为例

接着往下,如果gpu参数为1,进入单卡程序。
调用build_detector新建一个模型。

def build_detector(cfg, train_cfg=None, test_cfg=None):
    from . import detectors
    return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))

可以看到是针对建立detector的一个封装

def build(cfg, parrent=None, default_args=None):
    if isinstance(cfg, list):
        modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg]
        return nn.Sequential(*modules)
    else:
        return _build_module(cfg, parrent, default_args)

再找到build函数,build函数可以处理放在list中的多个module的建立,再调用_build_module

def _build_module(cfg, parrent=None, default_args=None):
    return cfg if isinstance(cfg, nn.Module) else obj_from_dict(
        cfg, parrent, default_args)

这里其实就是又用到了之前使用的obj_from_dict函数,按照cfg配置返回一个detector的模块对象。
只不过obj_from_dict函数传入的参数变成了 cfg=cfg.model , parrent=detectors , default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)

下一步,加载参数ckp
接着调用MMDataParallel 将model放到GPU上
然后调用build_dataloader ,建立dataloader对象。
我们看一下这个函数

def build_dataloader(dataset,
                     imgs_per_gpu,
                     workers_per_gpu,
                     num_gpus=1,
                     dist=True,
                     **kwargs):

输入时dataset对象,每个gpu的图片数,线程数,多少个gpu,是否分布式。

  • 如果是分布式读取数据
 if dist:
        rank, world_size = get_dist_info()
        sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size,
                                          rank)
        batch_size = imgs_per_gpu
        num_workers = workers_per_gpu

获取分布式的信息,然后调用sampler中的DistributedGroupSampler划分数据集,得到sampler。

  • 如果是单卡
 else:
       if not kwargs.get('shuffle', True):
           sampler = None
       else:
           sampler = GroupSampler(dataset, imgs_per_gpu)
       batch_size = num_gpus * imgs_per_gpu
       num_workers = num_gpus * workers_per_gpu

调用GroupSampler 得到sampler。

解释一下kwargs.get():
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.

然后,调用pytorch.utils.data中的DataLoader生成一个dataloader对象。

data_loader = DataLoader(
        dataset,
        batch_size=batch_size,
        sampler=sampler,
        num_workers=num_workers,
        collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
        pin_memory=False,
        **kwargs)

    return data_loader

doc:sampler (Sampler, optional) – defines the strategy to draw samples from the dataset. If specified, shuffle must be False.
文档中说,sampler参数如果指定了,就必须设置shuffle=False,这也是为什么,前面检查了shuffle为True时,要设置Sampler=None
doc:collate_fn (callable, optional) – merges a list of samples to form a mini-batch.
我们来看一下collate function

def collate(batch, samples_per_gpu=1):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.
    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """

    if not isinstance(batch, collections.Sequence):
        raise TypeError("{} is not supported.".format(batch.dtype))

    if isinstance(batch[0], DataContainer):
        assert len(batch) % samples_per_gpu == 0
        stacked = []
        #第一种情况
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(
                stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
        #第二种情况
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)
                # TODO: handle tensors other than 3d
                assert batch[i].dim() == 3
                c, h, w = batch[0].size()
                for sample in batch[i:i + samples_per_gpu]:
                    assert c == sample.size(0)
                    h = max(h, sample.size(1))
                    w = max(w, sample.size(2))
                padded_samples = [
                    F.pad(
                        sample.data,
                        (0, w - sample.size(2), 0, h - sample.size(1)),
                        value=sample.padding_value)
                    for sample in batch[i:i + samples_per_gpu]
                ]
                stacked.append(default_collate(padded_samples))
        #第三种情况
        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], collections.Sequence):
        transposed = zip(*batch)
        return [collate(samples, samples_per_gpu) for samples in transposed]
    elif isinstance(batch[0], collections.Mapping):
        return {
            key: collate([d[key] for d in batch], samples_per_gpu)
            for key in batch[0]
        }
    else:
        return default_collate(batch)
        #from torch.utils.data.dataloader import default_collate

collate函数定义了四种情况处理。分别是DataContainer collections.Sequence collections.Mapping 其他
我们只看一下最关键的DataContainer的情况,因为这个数据类型是我们自定义的一个类型。(会在dataloading部分进行专门讲解)

三种情况

  1. cpu_only = True, e.g., meta data
    将一个batch的数据按照samples_per_gpu的大小划分成n个,每个都存在一个列表中,然后n个list放进一个大的list中(即stacked)
    返回一个DataContainer对象return DataContainer( stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
  2. cpu_only = False, stack = True, e.g., images tensors
    同样的循环间隔samples_per_gpu,为了将batch划分成n个minibatch。获取第一个数据batch[0]的大小c,w,h,然后对每个[i:i+samples_per_gpu]的数据,都计算他们和第一个数据的大小的差,padding这个大小的差距。最后调用torch中默认的default_coolate将一个minibatch大小的list数据变成image tensors。
    最后得到n个这样的image tensors,都加入到大的list stacked中,返回DataContainer对象
  3. cpu_only = False, stack = False, e.g., gt bboxes
    操作与第一种情况一样,不同在于返回的return DataContainer(stacked, batch[0].stack, batch[0].padding_value),cpu_only参数==False
    所以这种情况返回的是GT bboxes

doc:pin_memory (bool, optional) – If True, the data loader will copy tensors into CUDA pinned memory before returning them.

对CUDA架构而言,主机端的内存被分为两种,一种是可分页内存(pageable memroy)和页锁定内存(page-lock或 pinned)。可分页内存是由操作系统API malloc()在主机上分配的,页锁定内存是由CUDA函数cudaHostAlloc()在主机内存上分配的,页锁定内存的重要属性是主机的操作系统将不会对这块内存进行分页和交换操作,确保该内存始终驻留在物理内存中。
GPU知道页锁定内存的物理地址,可以通过“直接内存访问(Direct Memory Access,DMA)”技术直接在主机和GPU之间复制数据,速率更快。由于每个页锁定内存都需要分配物理内存,并且这些内存不能交换到磁盘上,所以页锁定内存比使用标准malloc()分配的可分页内存更消耗内存空间

以上所有内容解决了gpu只有一个的情况,得到了model和data_loader,然后调用single_test函数得到outputs。

ok.现在回到test.py文件,下面是gpu num大于1的情况
像将cfg.model的参数复制到model_args。更新test_cfg配置
model_type = getattr(detectors, model_args.pop('type'))这一步获取对应的detector类。
在model/detector文件init中,共定义了以下几种detector:
__ all__ = [ 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' ]

然后调用
outputs = parallel_test( model_type, model_args, args.checkpoint, dataset, _data_func, range(args.gpus), workers_per_gpu=args.proc_per_gpu)
我们看一下parallel_test函数
多进程基础教程
直接在代码中解释

def parallel_test(model_cls,
                  model_kwargs,
                  checkpoint,
                  dataset,
                  data_func,
                  gpus,
                  workers_per_gpu=1):
    """Parallel testing on multiple GPUs.
    Args:
        model_cls (type): Model class type.
        model_kwargs (dict): Arguments to init the model.
        checkpoint (str): Checkpoint filepath.
        dataset (:obj:`Dataset`): The dataset to be tested.
        data_func (callable): The function that generates model inputs.
        gpus (list[int]): GPU ids to be used.
        workers_per_gpu (int): Number of processes on each GPU. It is possible
            to run multiple workers on each GPU.
    Returns:
        list: Test results.
    """
    #开启一个多进程的上下文
    ctx = multiprocessing.get_context('spawn')
    #定义两个多线程队列,用来存储结果/索引结果
    idx_queue = ctx.Queue()
    result_queue = ctx.Queue()
    #计算线程的总共多少个
    num_workers = len(gpus) * workers_per_gpu
    #定义num_workers个线程函数。
    #定义线程函数worker_func, args参数要是可迭代的
    workers = [
        ctx.Process(
            target=worker_func,
            args=(model_cls, model_kwargs, checkpoint, dataset, data_func,
                  gpus[i % len(gpus)], idx_queue, result_queue))
        for i in range(num_workers)
    ]
    #启动进程
    for w in workers:
    	#要使你的python服务不受终端影响而常驻系统,就需要将它变成守护进程
        w.daemon = True
        w.start()
	#index加入索引队列
    for i in range(len(dataset)):
        idx_queue.put(i)
	#定义一个结果list初始化为[None,None,None.....]
    results = [None for _ in range(len(dataset))]
    #定义一个进度bar
    prog_bar = mmcv.ProgressBar(task_num=len(dataset))
    
    for _ in range(len(dataset)):
    	#从结果队列 获取结果 ,索引和result
        idx, res = result_queue.get()
        #将得到的result加入结果list
        results[idx] = res
        #更新bar的显示
        prog_bar.update()
    print('\n')
    #关闭每个进程
    for worker in workers:
        worker.terminate()

    return results

ok那么上面函数中最关键的是定义线程时候加入的worker_func函数。

def worker_func(model_cls, model_kwargs, checkpoint, dataset, data_func,
                gpu_id, idx_queue, result_queue):
    model = model_cls(**model_kwargs)
    #实例化这个model,也就是前面的detector
    #加载ckp
    load_checkpoint(model, checkpoint, map_location='cpu')
    #设置多个gpuid
    torch.cuda.set_device(gpu_id)
    model.cuda()
    model.eval()
    # 不更新梯度的上下文管理器
    with torch.no_grad():
    	#死循环,到索引队列空了后退出。
        while True:
        	#首先从索引队列中获取索引
            idx = idx_queue.get()
            #获取第idx个数据
            data = dataset[idx]
            #计算结果
            result = model(**data_func(data, gpu_id))
            #将结果(idx,result)加入到结果队列
            result_queue.put((idx, result))

data_func函数是怎么样的?

def _data_func(data, device_id):
    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
    return dict(return_loss=False, rescale=True, **data)

datafunc函数是将data分配到多个gpu–>gpu_id。
看一下scatter函数
#如果输入时list类型,那么对list中每一项都递归地做scatter。

def scatter(input, devices, streams=None):
    """Scatters tensor across multiple GPUs.
    """
    if streams is None:
        streams = [None] * len(devices)

    if isinstance(input, list):
        chunk_size = (len(input) - 1) // len(devices) + 1
        #如果输入时list类型,那么对list中每一项都递归地做scatter。
        outputs = [
            scatter(input[i], [devices[i // chunk_size]],
                    [streams[i // chunk_size]]) for i in range(len(input))
        ]
        return outputs
    elif isinstance(input, torch.Tensor):
        output = input.contiguous()
        # TODO: copy to a pinned buffer first (if copying from CPU)
        stream = streams[0] if output.numel() > 0 else None
        with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
            output = output.cuda(devices[0], non_blocking=True)
        return output
    else:
        raise Exception('Unknown type {}.'.format(type(input)))

最后一部分
结果写入out文件

if args.out:
        print('writing results to {}'.format(args.out))
        mmcv.dump(outputs, args.out)
        eval_types = args.eval
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = args.out
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_file = args.out + '.json'
                    results2json(dataset, outputs, result_file)
                    coco_eval(result_file, eval_types, dataset.coco)
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = args.out + '.{}.json'.format(name)
                        results2json(dataset, outputs_, result_file)
                        coco_eval(result_file, eval_types, dataset.coco)

将之前得到的outputs通过mmcv/io.py中的dump函数写入输出文件。
接着,下面的大部分是evaluation,调用了coco API做eval,暂时不详细解释。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章