先测试把100个数字0-99个,按7:2:1数量比例分配给a集,b集,c集,其中单个样本随机分配
import random
totalnum = 100
t1 = totalnum*0.1
t2 = totalnum*0.2
t3 = totalnum*0.7
a = set()
b = set()
c = set()
while(len(a)<t1):
x = random.randint(0,totalnum)
if x not in a:
a.add(x)
while(len(b)<t2):
x = random.randint(0,totalnum)
if x in a:
continue
if x not in b:
b.add(x)
for x in range(totalnum):
if x in a or x in b:
continue
else:
c.add(x)
print "a = ",a,len(a)
print "b = ",b,len(b)
print "c = ",c,len(c)
wu@wu-X555LF:~$ python random_test.py
a = set([44, 42, 12, 46, 15, 49, 19, 20, 14, 59]) 10
b = set([35, 36, 86, 39, 8, 9, 75, 76, 13, 77, 29, 24, 78, 3, 53, 22, 88, 68, 93, 7]) 20
c = set([0, 1, 2, 4, 5, 6, 10, 11, 16, 17, 18, 21, 23, 25, 26, 27, 28, 30, 31, 32, 33, 34, 37, 38, 40, 41, 43, 45, 47, 48, 50, 51, 52, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 74, 79, 80, 81, 82, 83, 84, 85, 87, 89, 90, 91, 92, 94, 95, 96, 97, 98, 99]) 70
wu@wu-X555LF:~$
成功分配数据集。
接下来把图像标注文件,按样本7:2:1数量比例分配给train集,val集,test集,用于yolov3训练验证测试,其中单个样本随机分配。
其中一条样本记录为:data\JPEGImages\Air_conditioner\1.jpg 2 10 135 417 788
import random
import os
annotation_path = u"C:\\Users\\Administrator\\Desktop\\compete\\Annotations.txt"
train_path = u"C:\\Users\\Administrator\\Desktop\\compete\\train.txt"
val_path = u"C:\\Users\\Administrator\\Desktop\\compete\\val.txt"
test_path = u"C:\\Users\\Administrator\\Desktop\\compete\\test.txt"
train_file = open(train_path,"a+")
val_file = open(val_path,"a+")
test_file = open(test_path,"a+")
anno = open(annotation_path, 'r')
result = []
my_dict = {}
cnt = 0
for line in anno:
my_dict[cnt]=line
cnt+=1
totalnum = cnt
test_num = totalnum * 0.1
val_num = totalnum * 0.2
train_num = totalnum * 0.7
test_set = set()
val_set = set()
train_set = set()
while(len(test_set) < test_num):
x = random.randint(0,totalnum)
if x not in test_set :
test_set.add(x)
while(len(val_set) < val_num):
x = random.randint(0,totalnum)
if x in test_set :
continue
if x not in val_set :
val_set.add(x)
for x in range(totalnum):
if x in test_set or x in val_set:
continue
else:
train_set.add(x)
index = 0
for i in range(cnt):
strs = str(index)+" "+ my_dict[i]
if i in train_set:
train_file.write(strs)
elif i in val_set:
val_file.write(strs)
else:
test_file.write(strs)
index+=1
train_file.close
val_file.close
test_file.close
Annotations.txt
data\JPEGImages\Air_conditioner\1.jpg 2 10 135 417 788
data\JPEGImages\Air_conditioner\10.jpg 3 23 267 472 570
data\JPEGImages\Air_conditioner\100.jpg 3 240 105 565 365
data\JPEGImages\Air_conditioner\1000.jpg 3 4 7 327 662
data\JPEGImages\Air_conditioner\1001.jpg 2 3 8 470 791
data\JPEGImages\Air_conditioner\1002.jpg 3 74 6 627 311
data\JPEGImages\Air_conditioner\1004.jpg 3 164 112 657 316
data\JPEGImages\Air_conditioner\1005.jpg 2 58 161 442 792
data\JPEGImages\Air_conditioner\1007.jpg 2 57 5 473 795
data\JPEGImages\Air_conditioner\1008.jpg 2 350 184 525 473