測試

tools/test.py

tools/test.py

def main():
    args = parse_args()

    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
        raise ValueError('The output file must be a pkl file.')

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    #測試模式
    cfg.data.test.test_mode = True

    dataset = obj_from_dict(cfg.data.test, datasets, dict(test_mode=True))
    if args.gpus == 1:
        model = build_detector(
            cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
        load_checkpoint(model, args.checkpoint)
        model = MMDataParallel(model, device_ids=[0])

        data_loader = build_dataloader(
            dataset,
            imgs_per_gpu=1,
            workers_per_gpu=cfg.data.workers_per_gpu,
            num_gpus=1,
            dist=False,
            shuffle=False)
        outputs = single_test(model, data_loader, args.show)
    else:
        model_args = cfg.model.copy()
        model_args.update(train_cfg=None, test_cfg=cfg.test_cfg)
        model_type = getattr(detectors, model_args.pop('type'))
        outputs = parallel_test(
            model_type,
            model_args,
            args.checkpoint,
            dataset,
            _data_func,
            range(args.gpus),
            workers_per_gpu=args.proc_per_gpu)

    if args.out:
        print('writing results to {}'.format(args.out))
        mmcv.dump(outputs, args.out)
        eval_types = args.eval
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = args.out
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_file = args.out + '.json'
                    results2json(dataset, outputs, result_file)
                    coco_eval(result_file, eval_types, dataset.coco)
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = args.out + '.{}.json'.format(name)
                        results2json(dataset, outputs_, result_file)
                        coco_eval(result_file, eval_types, dataset.coco)

解釋一下obj_from_dict方法

def obj_from_dict(info, parrent=None, default_args=None):
    """Initialize an object from dict.
    The dict must contain the key "type", which indicates the object type, it
    can be either a string or type, such as "list" or ``list``. Remaining
    fields are treated as the arguments for constructing the object.
    Args:
        info (dict): Object types and arguments.
        module (:class:`module`): Module which may containing expected object
            classes.
        default_args (dict, optional): Default arguments for initializing the
            object.
    Returns:
        any type: Object built from the dict.
    """
    assert isinstance(info, dict) and 'type' in info
    assert isinstance(default_args, dict) or default_args is None
    args = info.copy()
    obj_type = args.pop('type')
    if mmcv.is_str(obj_type):
        if parrent is not None:
            obj_type = getattr(parrent, obj_type)
        else:
            obj_type = sys.modules[obj_type]
    elif not isinstance(obj_type, type):
        raise TypeError('type must be a str or valid type, but got {}'.format(
            type(obj_type)))
    if default_args is not None:
        for name, value in default_args.items():
            args.setdefault(name, value)
    return obj_type(**args)

輸入參數info，是一個cfg裏的字典，比如這裏的是test配置。
第二個參數是module，包含了期望的對象類別。
第三個是默認參數，用來初始化對象的
首先，判斷info是不是字典，而且裏面必須包含type關鍵字
默認參數也要檢查是字典或者爲None
然後，pop出type字典的值 obj+type，
如果是字符串類型，在判斷module(parrent)參數是否爲空

不爲空，執行就從obj_type = getattr(parrent, obj_type)。測試時，parrent是datasets文件夾，obj_type是cfg中的test字典的type參數，所以相當於是從datasets文件夾裏，加載相應名稱的數據集讀取程序dataset。

如果obj_type不是任何一種python類型，就報錯。

然後，如果默認參數不爲空，迭代讀取每個默認參數的key:value，把這些字典添加到cfg中test字典的參數中（也就是args）。
最後返回一個數據集讀取的類 return obj_type(**args)
obj_type是dataset類，args是配置參數，是一個字典。
同理，這個函數也可以給定cfg,module,其他參數三個輸入來初始化任何一個對象，以上是以加載測試dataset爲例

接着往下，如果gpu參數爲1，進入單卡程序。
調用build_detector新建一個模型。

def build_detector(cfg, train_cfg=None, test_cfg=None):
    from . import detectors
    return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))

可以看到是針對建立detector的一個封裝

def build(cfg, parrent=None, default_args=None):
    if isinstance(cfg, list):
        modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg]
        return nn.Sequential(*modules)
    else:
        return _build_module(cfg, parrent, default_args)

再找到build函數，build函數可以處理放在list中的多個module的建立，再調用_build_module

def _build_module(cfg, parrent=None, default_args=None):
    return cfg if isinstance(cfg, nn.Module) else obj_from_dict(
        cfg, parrent, default_args)

這裏其實就是又用到了之前使用的obj_from_dict函數，按照cfg配置返回一個detector的模塊對象。
只不過obj_from_dict函數傳入的參數變成了 cfg=cfg.model , parrent=detectors , default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg) ，

下一步，加載參數ckp
接着調用MMDataParallel 將model放到GPU上
然後調用build_dataloader ，建立dataloader對象。
我們看一下這個函數

def build_dataloader(dataset,
                     imgs_per_gpu,
                     workers_per_gpu,
                     num_gpus=1,
                     dist=True,
                     **kwargs):

輸入時dataset對象，每個gpu的圖片數，線程數，多少個gpu，是否分佈式。

如果是分佈式讀取數據

 if dist:
        rank, world_size = get_dist_info()
        sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size,
                                          rank)
        batch_size = imgs_per_gpu
        num_workers = workers_per_gpu

獲取分佈式的信息，然後調用sampler中的DistributedGroupSampler劃分數據集，得到sampler。

如果是單卡

 else:
       if not kwargs.get('shuffle', True):
           sampler = None
       else:
           sampler = GroupSampler(dataset, imgs_per_gpu)
       batch_size = num_gpus * imgs_per_gpu
       num_workers = num_gpus * workers_per_gpu

調用GroupSampler 得到sampler。

解釋一下kwargs.get()：
get(key[, default])
Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to None, so that this method never raises a KeyError.

然後，調用pytorch.utils.data中的DataLoader生成一個dataloader對象。

data_loader = DataLoader(
        dataset,
        batch_size=batch_size,
        sampler=sampler,
        num_workers=num_workers,
        collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
        pin_memory=False,
        **kwargs)

    return data_loader

doc：sampler (Sampler, optional) – defines the strategy to draw samples from the dataset. If specified, shuffle must be False.
文檔中說，sampler參數如果指定了，就必須設置shuffle=False，這也是爲什麼，前面檢查了shuffle爲True時，要設置Sampler=None
doc：collate_fn (callable, optional) – merges a list of samples to form a mini-batch.
我們來看一下collate function ，

def collate(batch, samples_per_gpu=1):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.
    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """

    if not isinstance(batch, collections.Sequence):
        raise TypeError("{} is not supported.".format(batch.dtype))

    if isinstance(batch[0], DataContainer):
        assert len(batch) % samples_per_gpu == 0
        stacked = []
        #第一種情況
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(
                stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
        #第二種情況
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)
                # TODO: handle tensors other than 3d
                assert batch[i].dim() == 3
                c, h, w = batch[0].size()
                for sample in batch[i:i + samples_per_gpu]:
                    assert c == sample.size(0)
                    h = max(h, sample.size(1))
                    w = max(w, sample.size(2))
                padded_samples = [
                    F.pad(
                        sample.data,
                        (0, w - sample.size(2), 0, h - sample.size(1)),
                        value=sample.padding_value)
                    for sample in batch[i:i + samples_per_gpu]
                ]
                stacked.append(default_collate(padded_samples))
        #第三種情況
        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], collections.Sequence):
        transposed = zip(*batch)
        return [collate(samples, samples_per_gpu) for samples in transposed]
    elif isinstance(batch[0], collections.Mapping):
        return {
            key: collate([d[key] for d in batch], samples_per_gpu)
            for key in batch[0]
        }
    else:
        return default_collate(batch)
        #from torch.utils.data.dataloader import default_collate

collate函數定義了四種情況處理。分別是DataContainer collections.Sequence collections.Mapping 其他。
我們只看一下最關鍵的DataContainer的情況，因爲這個數據類型是我們自定義的一個類型。（會在dataloading部分進行專門講解）

三種情況

cpu_only = True, e.g., meta data
將一個batch的數據按照samples_per_gpu的大小劃分成n個，每個都存在一個列表中，然後n個list放進一個大的list中（即stacked）
返回一個DataContainer對象return DataContainer( stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
cpu_only = False, stack = True, e.g., images tensors
同樣的循環間隔samples_per_gpu，爲了將batch劃分成n個minibatch。獲取第一個數據batch[0]的大小c,w,h，然後對每個[i:i+samples_per_gpu]的數據，都計算他們和第一個數據的大小的差，padding這個大小的差距。最後調用torch中默認的default_coolate將一個minibatch大小的list數據變成image tensors。
最後得到n個這樣的image tensors，都加入到大的list stacked中，返回DataContainer對象
cpu_only = False, stack = False, e.g., gt bboxes
操作與第一種情況一樣，不同在於返回的return DataContainer(stacked, batch[0].stack, batch[0].padding_value),cpu_only參數==False
所以這種情況返回的是GT bboxes

doc:pin_memory (bool, optional) – If True, the data loader will copy tensors into CUDA pinned memory before returning them.

對CUDA架構而言，主機端的內存被分爲兩種，一種是可分頁內存（pageable memroy）和頁鎖定內存（page-lock或 pinned）。可分頁內存是由操作系統API malloc()在主機上分配的，頁鎖定內存是由CUDA函數cudaHostAlloc()在主機內存上分配的，頁鎖定內存的重要屬性是主機的操作系統將不會對這塊內存進行分頁和交換操作，確保該內存始終駐留在物理內存中。
GPU知道頁鎖定內存的物理地址，可以通過“直接內存訪問（Direct Memory Access，DMA）”技術直接在主機和GPU之間複製數據，速率更快。由於每個頁鎖定內存都需要分配物理內存，並且這些內存不能交換到磁盤上，所以頁鎖定內存比使用標準malloc()分配的可分頁內存更消耗內存空間。

以上所有內容解決了gpu只有一個的情況，得到了model和data_loader，然後調用single_test函數得到outputs。

ok.現在回到test.py文件，下面是gpu num大於1的情況
像將cfg.model的參數複製到model_args。更新test_cfg配置
model_type = getattr(detectors, model_args.pop('type'))這一步獲取對應的detector類。
在model/detector文件init中，共定義了以下幾種detector：
__ all__ = [ 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' ]

然後調用
outputs = parallel_test( model_type, model_args, args.checkpoint, dataset, _data_func, range(args.gpus), workers_per_gpu=args.proc_per_gpu)
我們看一下parallel_test函數
多進程基礎教程
直接在代碼中解釋

def parallel_test(model_cls,
                  model_kwargs,
                  checkpoint,
                  dataset,
                  data_func,
                  gpus,
                  workers_per_gpu=1):
    """Parallel testing on multiple GPUs.
    Args:
        model_cls (type): Model class type.
        model_kwargs (dict): Arguments to init the model.
        checkpoint (str): Checkpoint filepath.
        dataset (:obj:`Dataset`): The dataset to be tested.
        data_func (callable): The function that generates model inputs.
        gpus (list[int]): GPU ids to be used.
        workers_per_gpu (int): Number of processes on each GPU. It is possible
            to run multiple workers on each GPU.
    Returns:
        list: Test results.
    """
    #開啓一個多進程的上下文
    ctx = multiprocessing.get_context('spawn')
    #定義兩個多線程隊列，用來存儲結果/索引結果
    idx_queue = ctx.Queue()
    result_queue = ctx.Queue()
    #計算線程的總共多少個
    num_workers = len(gpus) * workers_per_gpu
    #定義num_workers個線程函數。
    #定義線程函數worker_func， args參數要是可迭代的
    workers = [
        ctx.Process(
            target=worker_func,
            args=(model_cls, model_kwargs, checkpoint, dataset, data_func,
                  gpus[i % len(gpus)], idx_queue, result_queue))
        for i in range(num_workers)
    ]
    #啓動進程
    for w in workers:
    	#要使你的python服務不受終端影響而常駐系統，就需要將它變成守護進程
        w.daemon = True
        w.start()
	#index加入索引隊列
    for i in range(len(dataset)):
        idx_queue.put(i)
	#定義一個結果list初始化爲[None,None,None.....]
    results = [None for _ in range(len(dataset))]
    #定義一個進度bar
    prog_bar = mmcv.ProgressBar(task_num=len(dataset))
    
    for _ in range(len(dataset)):
    	#從結果隊列 獲取結果 ，索引和result
        idx, res = result_queue.get()
        #將得到的result加入結果list
        results[idx] = res
        #更新bar的顯示
        prog_bar.update()
    print('\n')
    #關閉每個進程
    for worker in workers:
        worker.terminate()

    return results

ok那麼上面函數中最關鍵的是定義線程時候加入的worker_func函數。

def worker_func(model_cls, model_kwargs, checkpoint, dataset, data_func,
                gpu_id, idx_queue, result_queue):
    model = model_cls(**model_kwargs)
    #實例化這個model，也就是前面的detector
    #加載ckp
    load_checkpoint(model, checkpoint, map_location='cpu')
    #設置多個gpuid
    torch.cuda.set_device(gpu_id)
    model.cuda()
    model.eval()
    # 不更新梯度的上下文管理器
    with torch.no_grad():
    	#死循環，到索引隊列空了後退出。
        while True:
        	#首先從索引隊列中獲取索引
            idx = idx_queue.get()
            #獲取第idx個數據
            data = dataset[idx]
            #計算結果
            result = model(**data_func(data, gpu_id))
            #將結果(idx,result)加入到結果隊列
            result_queue.put((idx, result))

data_func函數是怎麼樣的？

def _data_func(data, device_id):
    data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
    return dict(return_loss=False, rescale=True, **data)

datafunc函數是將data分配到多個gpu–>gpu_id。
看一下scatter函數
#如果輸入時list類型，那麼對list中每一項都遞歸地做scatter。

def scatter(input, devices, streams=None):
    """Scatters tensor across multiple GPUs.
    """
    if streams is None:
        streams = [None] * len(devices)

    if isinstance(input, list):
        chunk_size = (len(input) - 1) // len(devices) + 1
        #如果輸入時list類型，那麼對list中每一項都遞歸地做scatter。
        outputs = [
            scatter(input[i], [devices[i // chunk_size]],
                    [streams[i // chunk_size]]) for i in range(len(input))
        ]
        return outputs
    elif isinstance(input, torch.Tensor):
        output = input.contiguous()
        # TODO: copy to a pinned buffer first (if copying from CPU)
        stream = streams[0] if output.numel() > 0 else None
        with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
            output = output.cuda(devices[0], non_blocking=True)
        return output
    else:
        raise Exception('Unknown type {}.'.format(type(input)))

最後一部分
結果寫入out文件

if args.out:
        print('writing results to {}'.format(args.out))
        mmcv.dump(outputs, args.out)
        eval_types = args.eval
        if eval_types:
            print('Starting evaluate {}'.format(' and '.join(eval_types)))
            if eval_types == ['proposal_fast']:
                result_file = args.out
                coco_eval(result_file, eval_types, dataset.coco)
            else:
                if not isinstance(outputs[0], dict):
                    result_file = args.out + '.json'
                    results2json(dataset, outputs, result_file)
                    coco_eval(result_file, eval_types, dataset.coco)
                else:
                    for name in outputs[0]:
                        print('\nEvaluating {}'.format(name))
                        outputs_ = [out[name] for out in outputs]
                        result_file = args.out + '.{}.json'.format(name)
                        results2json(dataset, outputs_, result_file)
                        coco_eval(result_file, eval_types, dataset.coco)

將之前得到的outputs通過mmcv/io.py中的dump函數寫入輸出文件。
接着，下面的大部分是evaluation，調用了coco API做eval，暫時不詳細解釋。

mmdetection代碼-tool-test

測試

tools/test.py

[轉帖]使用NMT和pmap解決JVM資源泄漏問題原創

Python實現大麥網搶票的四大關鍵技術點解析

salesforce零基礎學習（一百三十八）零碎知識點小總結（十）

一款開源的.NET程序集反編譯、編輯和調試神器

關於接口協議，你必須要知道這些！

【2024-05-21】以茶會友

leetcode

RPNhead類代碼解釋

mmdetection代碼-tool-test

論文筆記DJRDER

[圖像處理]圖像處理中不適定問題（ill posedproblem）或稱爲反問題（inverseProblem）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結