#!/usr/bin/env python
# vim:set fileencoding=utf-8:
#讀取xml文件存入xml_list
#隨機讀取xml_list的內容,按照比例存儲在對應的txt中
#保存txt文件
import os, random
import copy#use copy.deepcopy to copy list
#xml的路徑以及分配後的txt存儲路徑
xml_path = '/home/lys/py-faster-rcnn/data/mydata/image_with_object/Annotations/'
txt_path = '/home/lys/py-faster-rcnn/data/mydata/image_with_object/Main/'
test_percent = 0.2#測試集所佔比例
val_percent = 0.2#驗證集所佔比例
xml_list = os.listdir(xml_path)
#remove the extend .xml
xml_list = [xml.split('.')[0] for xml in xml_list]
xml_len = len(xml_list)
#allot randomly
test = random.sample(xml_list, int(xml_len * test_percent))
trainval = copy.deepcopy(xml_list)
for xml in test:
trainval.remove(xml)
val = random.sample(trainval, int(len(trainval) * val_percent))
train = copy.deepcopy(trainval)
for xml in val:
train.remove(xml)
#renew txt and write
trainval_path = open(os.path.join(txt_path,'trainval.txt'),'w+')
train_path = open(os.path.join(txt_path,'train.txt'),'w+')
val_path = open(os.path.join(txt_path,'val.txt'),'w+')
test_path = open(os.path.join(txt_path,'test.txt'),'w+')
# add enter to each line
for trainval_xml in trainval:
trainval_path.write(trainval_xml+'\n')
for train_xml in train:
train_path.write(train_xml+'\n')
for val_xml in val:
val_path.write(val_xml+'\n')
for test_xml in test:
test_path.write(test_xml+'\n')
trainval_path.close()
train_path.close()
val_path.close()
test_path.close()
遇到的問題:
移除後綴,需要逐個移除
複製list,直接賦值,list a = list b,這樣並沒有創建新的list只是將b指向了a,修改會互相影響,需要使用copy.deepcopy進行復制
換行問題,list中的元素換行,writeline直接複製,不換行;想要換行需要逐個訪問list的元素然後+'\n'
新建文件,使用w+,存在則重寫,否則新建。會覆蓋之前的文件內容
用於分配驗證集,測試集,訓練集;生成的txt存在一個Main文件下,替換‘/home/lys/py-faster-rcnn/data/VOCdevkit2007/VOC2007/ImageSets/’下的Main文件