一、數據目錄結構
# 數據集目錄結構
└── VOCdevkit
└── VOC2020
├── Annotations #自己拷貝進來 !
│ ├── 20190615163323424.xml
│ ├── .......還有很多xml
├── ImageSets
│ └── Main #標籤的訓練和驗證集文本
│ ├── 1_train.txt
│ ├── 1_val.txt
│ ├── 2_train.txt
│ ├── 2_val.txt
│ ├── 3_train.txt
│ ├── 3_val.txt
│ ├── 4_train.txt
│ └── 4_val.txt
├── JPEGImages #自己拷貝進來 !
│ ├── 20190615163323424.jpg
│ ├── .......還有很多圖片
│
│
└── label
├── 20190615163323424.txt
├── .......還有很多txt
二、生成 Main - 標籤和訓練和驗證集文本
import os
from os import getcwd
# wd = getcwd()
wd = 'D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/my_data_set' #數據來源
out_file='D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/ImageSets/Main'
mulu = [
('coco2017_train_images', '/' + 'coco2017_train_images_xml'),
('coco2017_val_images', '/' + 'coco2017_val_images_xml'),
]
count = 0
for i in mulu:
count += 1
dir = wd + i[1]
print(dir)
filenames = os.listdir(dir)
out_file_path = os.path.join(out_file, i[0])
f = open(out_file_path+'.txt', 'w')
count_1 = 0
for filename in filenames:
count_1 += 1
out_path = dir + '/' + filename.replace('xml', 'jpg')
out_path = out_path.replace(i[1], i[0])
f.write(out_path + '\n')
f.close()
print('done!,total:%s' % count_1,i[0])
三、生成對應 JPEGImages 的訓練和驗證集文本
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets = [
('2020', 'train_coco2017_images'),
('2020', 'val_coco2017_images'),
]
classes = ['car', 'bus', 'truck']
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(wd ,year, image_id):
print('%s'%(image_id))
in_file = open('%s/VOCdevkit/VOC%s/Annotations/%s.xml' %(wd, year, image_id), encoding='UTF-8')
out_file = open('%s/VOCdevkit/VOC%s/labels/%s.txt' %(wd, year, image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
if __name__ == '__main__':
# wd = getcwd()
wd = 'D:/openvino_test/darknet-train-dataset'
for year, image_set in sets:
if not os.path.exists('%s/VOCdevkit/VOC%s/labels/' %(wd,year)): #待生成label文件夾
os.makedirs('%s/VOCdevkit/VOC%s/labels/' %(wd,year))
image_ids = open('%s/VOCdevkit/VOC%s/ImageSets/Main/%s.txt' #原來數據集的目錄
%(wd, year, image_set)).read().strip().split()
list_file = open('%s/%s_%s.txt' %(wd, year, image_set), 'w')
for image_id in image_ids:
filename = os.path.splitext(image_id)[0]
if filename == '1' or filename == '-1':
continue
filename = filename.split('/')[-1]
# print('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' %(wd, year, filename))
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' %(wd, year, filename))
print(image_id)
convert_annotation(wd, year, filename)
list_file.close()
#按自己生成的目錄再合併 都不遲!
os.system("cat train_*.txt > train.txt") # os.system("cat xxx/yyy/train_*.txt > xxx/yyy/train.txt")
os.system("cat val_*.txt > val.txt")
四、我的 最終目錄和路徑
五、新建 voc.data (我是 car.data)
classes= 3
train = D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/train.txt
valid = D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/val.txt
names = D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/car.names
backup =D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020//backup
六、新建 cooc.names (我是 car.names)
car
bus
truck
七、改寫配置
配置文件需要注意的幾個參數[net].batch
[net].subdivisions
[yolo].anchors
[yolo].classes
[yolo].num
還有就是每一個[yolo]上面的[convolutional].filters
這個參數!
所有的參數都已經在下面配置文件裏做好註釋
計算錨點:
./darknet detector calc_anchors data/obj.data -num_of_clusters 6(這個6對應下面的num) -width 416 -height 416
filters= ([yolo].classes +5)*3
[net]
# Testing
batch=128 # 一批訓練樣本的樣本數量,每batch個樣本更新一次參數
subdivisions=6 # batch/subdivisions作爲一次性送入訓練器的樣本數量,如果內存不夠大,將batch分割爲subdivisions個子batch
# Training
# batch=64
# subdivisions=2
width=416 # Input圖像的高
height=416 # Input圖像的寬
channels=3 # Input圖像的通道數
momentum=0.9 # 動量
decay=0.0005 # 權重衰減正則項,防止過擬合
angle=0 # 通過旋轉角度來生成更多訓練樣本
saturation = 1.5 # 通過調整飽和度來生成更多訓練樣本
exposure = 1.5 # 通過調整曝光量來生成更多訓練樣本
hue=.1 # 通過調整色調來生成更多訓練樣本
learning_rate=0.001 # 初始學習率
burn_in=1000 # 在迭代次數小於burn_in時,其學習率的更新有一種方式,大於burn_in時,才採用policy的更新方式
#max_batches = 50200 # 訓練達到max_batches後停止學習
max_batches = 301200
policy=steps # 調整學習率的policy,有如下policy:CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
#steps=40000,45000 # 根據batch_num調整學習率
steps=240000,270000
scales=.1,.1 # 學習率變化的比例,累計相乘
[convolutional]
batch_normalize=1 # 是否做BN
filters=16 # 輸出多少個特徵圖
size=3 # 卷積核的尺寸
stride=1 # 做卷積運算的步長
pad=1 # 如果pad爲0,padding由 padding參數指定。如果pad爲1,padding大小爲size/2
activation=leaky # 激活函數:logistic,loggy,relu,elu,relie,plse,hardtan,lhtan,linear,ramp,leaky,tanh,stair
[maxpool]
size=2 # 池化層尺寸
stride=2 # 池化步進
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24 # 計算公式 ([yolo].classes +5)*3
activation=linear
[yolo]
mask = 3,4,5
anchors = 15, 15, 37, 39, 88, 68, 105,156, 241,157, 318,310 # 計算錨點: ./darknet detector calc_anchors data/obj.data -num_of_clusters 6(這個6對應下面的num) -width 416 -height 416
classes=3 # 這裏填寫真實的標籤個數
num=6
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24 # 計算公式 ([yolo].classes +5)*3
activation=linear
[yolo]
mask = 0,1,2 #這一層預測第0,1,2個 anchor boxes ,每個yolo層實際上只預測3個由mask定義的anchors
anchors = 15, 15, 37, 39, 88, 68, 105,156, 241,157, 318,310 # 計算錨點: ./darknet detector calc_anchors data/obj.data -num_of_clusters 6(這個6對應下面的num) -width 416 -height 416
classes=3
num=6
jitter=.3 # 通過抖動增加噪聲來抑制過擬合
ignore_thresh = .7 #決定是否需要計算IOU誤差的參數,大於thresh,IOU誤差不會夾在cost function中
truth_thresh = 1
random=1 # 如果爲1,每次迭代圖片大小隨機從320到608,步長爲32,如果爲0,每次訓練大小與輸入大小一致
八、下載卷積層的預訓練權重
yolov3(darknet53.conv.74):下載
tiny-yolov3:(yolov3-tiny.conv.15)
- 下載 yolov3-tiny 的 默認權重文件(yolov3-tiny.weights):下載
- 使用命令獲取預先訓練的權重:
darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
cfg/yolov3-tiny.cfg 是 darknet 原來的那個cfg,不是上面自定義的噢!
九、開始訓練
# cd 到對應目錄
darknet detector train D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/car.data D:/openvino_test/darknet-train-dataset/VOCdevkit/VOC2020/yolov3-tiny.cfg yolov3-tiny.conv.15 -gpus 0