在此之前需要首先安裝pycocotools，網上很多人說直接sudo pip install安裝不了，要去git上下載coco的api然後編譯python的接口，然而我按照這樣的方式出現問題了。最後確實是先安裝cython然後通過sudo pip install pycocotools這種方式安裝好的（對了，聽說win不支持，然而我也沒有試過所以不知道），這裏大家可以自行查閱一下pycocotools怎麼安裝的，畢竟每個人情況不同。

聲明下：以下步驟運行起來可能會比較麻煩，但是絕對不會錯。環境爲python2，如果你要用3的話，要麼改一下代碼一些庫函數。

1 第一步：

先下載數據集COCO2017，這裏應爲我用的darknet框架，所以只用了train（118287張）和val（5000張）。你可以將測試集和驗證集放在一起當成測試集（這裏推薦大家使用卡帕斯的split，重新切分了train，val，test）這樣測試起來，效果應該更好一點。

2 第二步：

將coco的instances_val（train）2017.json標籤轉爲voc（.xml）的標籤。

先貼上coco json數據標籤轉爲voc的xml標籤的代碼。

from pycocotools.coco import COCO
import os
import shutil
from tqdm import tqdm
import skimage.io as io
import matplotlib.pyplot as plt
import cv2
from PIL import Image, ImageDraw
savepath="/home/test/darknet/VOC2020/"
datasets_list=['val2017']    ##運行完之後再改爲train2017再運行一次
img_dir=savepath+'images/'         #####這個路徑會把你處理的圖片拷貝進來，這裏我們只處理了val2017文件夾下的數據，所以處理好之後需要修改生成image文件夾的名稱爲val2017
anno_dir=savepath+'annotations/'       # 當前目錄下會生成annotations文件夾存放xml，結束後修改名稱
classes_names =['person','bicycle', 'car','motorcycle','airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep','cow','elephant','bear', 'zebra', 'giraffe','backpack','umbrella', 'handbag','tie', 'suitcase', 'frisbee', 'skis', 'snowboard','sports ball', 'kite', 'baseball bat', 'baseball glove','skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork','knife', 'spoon', 'bowl', 'banana','apple', 'sandwich', 'orange','broccoli', 'carrot', 'hot dog', 'pizza','donut', 'cake', 'chair', 'couch', 'potted plant', 'bed','dining table', 'toilet','tv','laptop', 'mouse','remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
dataDir= '/home/test/darknet/coco2017' ####### 連接到coco的數據集
headstr = """\
<annotation>
    <folder>VOC</folder>
    <filename>%s</filename>
    <source>
        <database>My Database</database>
        <annotation>COCO</annotation>
        <image>flickr</image>
        <flickrid>NULL</flickrid>
    </source>
    <owner>
        <flickrid>NULL</flickrid>
        <name>company</name>
    </owner>
    <size>
        <width>%d</width>
        <height>%d</height>
        <depth>%d</depth>
    </size>
    <segmented>0</segmented>
"""
objstr = """\
    <object>
        <name>%s</name>
        <pose>Unspecified</pose>
        <truncated>0</truncated>
        <difficult>0</difficult>
        <bndbox>
            <xmin>%d</xmin>
            <ymin>%d</ymin>
            <xmax>%d</xmax>
            <ymax>%d</ymax>
        </bndbox>
    </object>
"""
 
tailstr = '''\
</annotation>
'''
def mkr(path):
    if os.path.exists(path):
        shutil.rmtree(path)
        os.mkdir(path)
    else:
        os.mkdir(path)
mkr(img_dir)
mkr(anno_dir)
def id2name(coco):
    classes=dict()
    for cls in coco.dataset['categories']:
        classes[cls['id']]=cls['name']
    return classes
 
def write_xml(anno_path,head, objs, tail):
    f = open(anno_path, "w")
    f.write(head)
    for obj in objs:
        f.write(objstr%(obj[0],obj[1],obj[2],obj[3],obj[4]))
    f.write(tail)
 
 
def save_annotations_and_imgs(coco,dataset,filename,objs):
    anno_path=anno_dir+filename[:-3]+'xml'
    print('anno_path:%s'%anno_path)
    #img_path=dataDir+'/'+'images'+'/'+dataset+'/'+filename
    img_path=dataDir+'/'+dataset+'/'+filename
    print('img_path:%s'%img_path)
    print('step3-image-path-OK')
    dst_imgpath=img_dir+filename
 
    img=cv2.imread(img_path)
    '''if (img.shape[2] == 1):
        print(filename + " not a RGB image")     
        return''' 
    print('img_path:%s'%img_path)
    print('dst_imgpath:%s'%dst_imgpath)
    shutil.copy(img_path, dst_imgpath)
 
    head=headstr % (filename, img.shape[1], img.shape[0], img.shape[2])
    tail = tailstr
    write_xml(anno_path,head, objs, tail)
 
 
def showimg(coco,dataset,img,classes,cls_id,show=True):
    global dataDir
    #I=Image.open('%s/%s/%s/%s'%(dataDir,'images',dataset,img['file_name']))
    I=Image.open('%s/%s/%s'%(dataDir,dataset,img['file_name']))  ########may be you can changed
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=cls_id, iscrowd=None)
    anns = coco.loadAnns(annIds)
    objs = []
    for ann in anns:
        class_name=classes[ann['category_id']]
        if class_name in classes_names:
            print(class_name)
            if 'bbox' in ann:
                bbox=ann['bbox']
                xmin = int(bbox[0])
                ymin = int(bbox[1])
                xmax = int(bbox[2] + bbox[0])
                ymax = int(bbox[3] + bbox[1])
                obj = [class_name, xmin, ymin, xmax, ymax]
                objs.append(obj)
                #draw = ImageDraw.Draw(I)
                #draw.rectangle([xmin, ymin, xmax, ymax])
    # if show:
        # plt.figure()
        # plt.axis('off')
        # plt.imshow(I)
        # plt.show()
    return objs
 
for dataset in datasets_list:
    annFile='{}/annotations_1/instances_{}.json'.format(dataDir,dataset) # 你放json文件的路徑
    print('annFile:%s'%annFile)  
    coco = COCO(annFile)         
    '''
    loading annotations into memory...
    Done (t=0.81s)
    creating index...
    index created!
    '''
    classes = id2name(coco)                     
    print("classes:%s"%classes)
    classes_ids = coco.getCatIds(catNms=classes_names)
    print(classes_ids)
    for cls in classes_names:
        cls_id=coco.getCatIds(catNms=[cls])
        img_ids=coco.getImgIds(catIds=cls_id)
        print(cls,len(img_ids))
        # imgIds=img_ids[0:10]
        for imgId in tqdm(img_ids):
            img = coco.loadImgs(imgId)[0]
            filename = img['file_name']
            #print(filename)
            objs=showimg(coco, dataset, img, classes,classes_ids,show=False)  
            #print(objs)
            save_annotations_and_imgs(coco, dataset, filename, objs)

網上有很多demo，然而我一個個debug了一下午，最後才發現這個是最好用的，還有的不知啥玩意寫的博客直接copy別人的代碼，全是bug，路徑賊雞兒亂而且重疊也不修改一下就算了，關鍵是用都沒用過就說可用，我上去就想給他一個大嘴巴子。好了，我也不是爲了吐槽，就是希望以後大家寫代碼，要麼就不要貼，要麼用過貼出來之後能加點註釋，這樣亂貼誤人子弟耽誤人時間真的不好，會搞得人家壓力很大。

3 第三步：

分別得到了train的xml和val的xml之後，我們進一步使用腳本轉化xml爲txt，其中分爲兩部分

3.1 第一部分

將所有xml的路徑取出來放到train_all.txt中

這裏我之前打算將train2017裏面的圖像路徑全部讀到train_all.txt中的，但是有個問題，從圖像讀出來的有118287個路徑，但是從xml只有117266.少了兩千張，不知道爲什麼。val2017裏面也是本來是5000，但是生成的只有4852張。

這裏默認你已經將train2017和val2017已經放到JPEGImages的文件夾下面

import os
from os import getcwd
wd =getcwd()
mulu=['/'+'annotations_train_xml','/'+'annotations_val_xml']
count=0
for i in mulu:
    count+=1
    dir =wd+i
    print(dir)
    filenames=os.listdir(dir)
    if count==1:
        f=open('train_all.txt','w')
        count_1=0
        for filename in filenames:
            count_1+=1
            out_path=dir+'/'+filename.replace('xml','jpg')
            out_path=out_path.replace('annotations_train_xml','JPEGImages/train2017')
            f.write(out_path+'\n')
        f.close()
        print('done!,total:%s'%count_1)
    elif count==2:
        f=open('val_all.txt','w')
        count_1=0
        for filename in filenames:
            count_1+=1
            out_path=dir+'/'+filename.replace('xml','jpg')
            out_path=out_path.replace('annotations_val_xml','JPEGImages/val2017')
            f.write(out_path+'\n')
        f.close()
        print('done!,total:%s'%count_1)

3.2 第二部分

然後將得到的文件一起整理好，生成每個xml對應的txt標籤

這裏也是先執行train 然後修改train爲val繼續執行，就是打###的三個部分的路徑

import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
#20190227@new-only 2007 data
#sets=[('2007', 'train'), ('2007', 'val'), ('2007_test', 'test')]
sets =['train']
#classes = ['1', '2', '3','4','5','6','7','8','9','10','11', '12', '13','14','15','16','17','18','19','20']
classes = ['person','bicycle', 'car','motorcycle','airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep','cow','elephant','bear', 'zebra', 'giraffe','backpack','umbrella', 'handbag','tie', 'suitcase', 'frisbee', 'skis', 'snowboard','sports ball', 'kite', 'baseball bat', 'baseball glove','skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork','knife', 'spoon', 'bowl', 'banana','apple', 'sandwich', 'orange','broccoli', 'carrot', 'hot dog', 'pizza','donut', 'cake', 'chair', 'couch', 'potted plant', 'bed','dining table', 'toilet','tv','laptop', 'mouse','remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 
 
def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)
 
def convert_annotation(image_id):
    #print("2-open annotations")
    #print('image_id:%s'%image_id)
    #image_id_1 = image_id.split('/')[-1]
    #print('image_id:%s'%image_id)
    #imade_id = image_id_1.replace("jpg","xml")
    #print('image_id:%s'%image_id)
    #in_file = open('/home/test/darknet/VOC2020/annotations_val_xml/%s.xml'%(image_id))
    #print('infile:','/home/test/darknet/VOC2020/annotations_val_xml/%s'%(image_id))
    in_file = open('/home/test/darknet/VOC2020/annotations_train_xml/%s.xml'%(image_id))    ##########
    #print("3-convert to txt")
    out_file = open('/home/test/darknet/VOC2020/annotations_train_txt/%s.txt'%(image_id), 'w') #######
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)
 
    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
        bb = convert((w,h), b)
        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
        #print("write ok")
 
#wd = getcwd()
wd = " "
 
for image_set in sets:

    image_ids = open('train_all.txt').read().strip().split()      ######
    # image_ids = open('%s.txt'%(image_set)).read().strip().split()
    print("start ")
    #list_file = open('%s.txt'%(image_set), 'w')
    for image_id in image_ids:
        #print("again write")
        #print('image_id:%s'%image_id)
        #list_file.write('%s/%s.jpg\n'%(wd, image_id))
        id = image_id.split('/')[-1].replace('jpg','xml')
        id =id.split('.')[0]
        print('id:%s'%id)
        convert_annotation(id)
    #list_file.close()

本來這個腳本是可以實現直接生成第一步的train_all.txt的，但是我覺得debug每次生成一個文件太麻煩就改了一下。

最後將你生成的txt標籤放到VOC2020的labels的train2017和val2017下面就可以開始訓練darknet了。

注意修改voc.names爲coco.names。還有yolo層前一個卷積核的數量和class數量，coco也是80類別所以卷積覈對應爲255

https://blog.csdn.net/weixin_42731241/article/details/81352013 這個博客也可以結合看

Ceri

發佈了39 篇原創文章 · 獲贊 24 · 訪問量 3萬+

私信關注

Darknet使用coco2017數據集訓練yolov3（親測可用）

1 第一步：

2 第二步：

3 第三步：

3.1 第一部分

3.2 第二部分

ziw2pdf

sql高級語法

apisix~helm方式的部署到k8s

firmeye - IoT固件漏洞挖掘工具

深度學習梯度重要性（梯度彌散和梯度爆炸）

賽靈思 DP-8020部署模型步驟

YOLOV3網絡描述

os.path模塊常用方法

關閉ubuntu圖形化界面運行程序，防止界面卡死

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結