Part I: kmeans 聚類
1.kmean 聚類代碼 包 下載 指路:
https://github.com/lars76/kmeans-anchor-boxes
2.使用方法:
'''
1. 打開 example.py 文件,修改 ANNOTATIONS_PATH。
2. 爲解決 box has no area 問題,修改 example.py 文件,過濾掉xmax-xmin=0 , ymax-ymin=0時的數據值。
3. 在pycharm中運行,或在 該文件的終端,python example.py
'''
# 修改後的example.py
import glob
import xml.etree.ElementTree as ET
import numpy as np
from kmeans import kmeans, avg_iou
ANNOTATIONS_PATH = "train2017_xml"
#ANNOTATIONS_PATH = "VOCdevkit/VOC2007/Annotations"
CLUSTERS = 9
def load_dataset(path):
dataset = []
for xml_file in glob.glob("{}/*xml".format(path)):
tree = ET.parse(xml_file)
height = int(tree.findtext("./size/height"))
width = int(tree.findtext("./size/width"))
for obj in tree.iter("object"):
xmin = int(obj.findtext("bndbox/xmin")) / width
ymin = int(obj.findtext("bndbox/ymin")) / height
xmax = int(obj.findtext("bndbox/xmax")) / width
ymax = int(obj.findtext("bndbox/ymax")) / height
dataset.append([xmax - xmin, ymax - ymin])
return np.array(dataset)
data = load_dataset(ANNOTATIONS_PATH)
data_nonzero=[]
for i in data:
if i[0]!=0 and i[1]!=0:
data_nonzero.append(i)
data_nonzero=np.array(data_nonzero)
out = kmeans(data_nonzero, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data_nonzero, out) * 100))
print("Boxes:\n {}".format(out))
ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))
Part II: 還原yolov3 anchors
1. 粘貼論文中的關於anchors的一段話:
2. coco有2014 和 2017 兩個年份的數據集。
coco數據集的標籤是json格式,並且他包含三個任務的文件,人關鍵點檢測,目標實例(檢測),看圖說話。
更多關於coco標註詳解,可以參考 https://blog.csdn.net/qq_29631521/article/details/97130811
我們用到的是instance 就是目標檢測實例。
3. 把 json 轉換成voc 需要的 xml
參考自 https://blog.csdn.net/Dreaming_of_you/article/details/105414664
#translate coco_json to xml
#使用時僅需修改21、22、24行路徑文件
import os
import time
import json
import pandas as pd
from tqdm import tqdm
from pycocotools.coco import COCO
def trans_id(category_id):
names = []
namesid = []
for i in range(0, len(cats)):
names.append(cats[i]['name'])
namesid.append(cats[i]['id'])
#print('id:{1}\t {0}'.format(names[i], namesid[i]))
index = namesid.index(category_id)
return index
#/home/studieren/PycharmProjects/kmeans-anchor-boxes-master/annotations/instances_train2017.json
root = '/home/studieren/PycharmProjects/kmeans-anchor-boxes-master/' # 你下載的 COCO 數據集所在目錄
dataType = 'train2014'
anno = '{}/annotations/instances_{}.json'.format(root, dataType)
xml_dir = '{}/{}_xml'.format(root, dataType)
coco = COCO(anno) # 讀文件
cats = coco.loadCats(coco.getCatIds()) # 這裏loadCats就是coco提供的接口,獲取類別
# Create anno dir
dttm = time.strftime("%Y%m%d%H%M%S", time.localtime())
if os.path.exists(xml_dir):
os.rename(xml_dir, xml_dir + dttm)
os.mkdir(xml_dir)
with open(anno, 'r') as load_f:
f = json.load(load_f)
imgs = f['images']
df_cate = pd.DataFrame(f['categories'])
df_cate_sort = df_cate.sort_values(["id"], ascending=True)
categories = list(df_cate_sort['name'])
print('categories = ',categories)
df_anno = pd.DataFrame(f['annotations'])
for i in tqdm(range(len(imgs))):
xml_content = []
file_name = imgs[i]['file_name']
height = imgs[i]['height']
img_id = imgs[i]['id']
width = imgs[i]['width']
xml_content.append("<annotation>")
xml_content.append(" <folder>VOC2007</folder>")
xml_content.append(" <filename>" + file_name + "</filename>")
xml_content.append(" <size>")
xml_content.append(" <width>" + str(width) + "</width>")
xml_content.append(" <height>" + str(height) + "</height>")
xml_content.append(" </size>")
xml_content.append(" <segmented>0</segmented>")
# 通過img_id找到annotations
annos = df_anno[df_anno["image_id"].isin([img_id])]
for index, row in annos.iterrows():
bbox = row["bbox"]
category_id = row["category_id"]
cate_name = categories[trans_id(category_id)]
# add new object
xml_content.append(" <object>")
xml_content.append(" <name>" + cate_name + "</name>")
xml_content.append(" <pose>Unspecified</pose>")
xml_content.append(" <truncated>0</truncated>")
xml_content.append(" <difficult>0</difficult>")
xml_content.append(" <bndbox>")
xml_content.append(" <xmin>" + str(int(bbox[0])) + "</xmin>")
xml_content.append(" <ymin>" + str(int(bbox[1])) + "</ymin>")
xml_content.append(" <xmax>" + str(int(bbox[0] + bbox[2])) + "</xmax>")
xml_content.append(" <ymax>" + str(int(bbox[1] + bbox[3])) + "</ymax>")
xml_content.append(" </bndbox>")
xml_content.append(" </object>")
xml_content.append("</annotation>")
x = xml_content
xml_content = [x[i] for i in range(0, len(x)) if x[i] != "\n"]
### list存入文件
xml_path = os.path.join(xml_dir, file_name.replace('.jpg', '.xml'))
with open(xml_path, 'w+', encoding="utf8") as f:
f.write('\n'.join(xml_content))
xml_content[:] = []
4. 獲得聚類結果
將轉化好的 包含xml 的 annotation_path 修改在 example.py 文件中並執行。
2014 結果如下:
Accuracy: 60.53%
Boxes:
[[0.103125 0.08541667]
[0.1 0.2625 ]
[0.2109375 0.16393443]
[0.046875 0.12708333]
[0.271875 0.42857143]
[0.0234375 0.06088993]
[0.053125 0.03958333]
[0.690625 0.709375 ]
[0.0140625 0.022 ]]
Ratios:
[0.37, 0.38, 0.38, 0.63, 0.64, 0.97, 1.21, 1.29, 1.34]
2017結果如下:
Accuracy: 60.42%
Boxes:
[[0.2640625 0.408 ]
[0.196875 0.15 ]
[0.09375 0.075 ]
[0.046875 0.03541667]
[0.021875 0.06041667]
[0.0125 0.02107728]
[0.0453125 0.12266667]
[0.096875 0.25 ]
[0.6765625 0.70352941]]
Ratios:
[0.36, 0.37, 0.39, 0.59, 0.65, 0.96, 1.25, 1.31, 1.32]
將兩個結果分別×416 和 608
# 416
# 2014
[[ 42. 35.]
[ 41. 109.]
[ 87. 68.]
[ 19. 52.]
[ 113. 178.]
[ 9. 25.]
[ 22. 16.]
[ 287. 295.]
[ 5. 9.]]
# 416
# 2017
[[ 109. 169.]
[ 81. 62.]
[ 39. 31.]
[ 19. 14.]
[ 9. 25.]
[ 5. 8.]
[ 18. 51.]
[ 40. 104.]
[ 281. 292.]]
# yolov3
[[ 10 13]
[ 16 30]
[ 33 23]
[ 30 61]
[ 62 45]
[ 59 119]
[116 90]
[156 198]
[373 326]]
# 608
# 2014
[[ 62. 51.] /
[ 60. 159.] /
[ 128. 99.] /
[ 28. 77.] /
[ 165. 260.] /
[ 14. 37.] /
[ 32. 24.] /
[ 419. 431.] /
[ 8. 13.]] /
# 608
# 2017
[[ 109. 169.]
[ 81. 62.]
[ 39. 31.]
[ 19. 14.]
[ 9. 25.]
[ 5. 8.]
[ 18. 51.]
[ 40. 104.]
[ 281. 292.]]
# yolov3 # 2014
[[ 10 13] [ 8. 13.]
[ 16 30] [ 14. 37.]
[ 33 23] [ 32. 24.]
[ 30 61] [ 28. 77.]
[ 62 45] [ 62. 51.]
[ 59 119] [ 60. 159.]
[116 90] [ 128. 99.]
[156 198] [ 165. 260.]
[373 326]] [ 419. 431.]
對比分析發現基本是 608 大小的 2014 的數據集擬合出來的
結論:用過對比發現
[10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326]
這個yolov3 anchors 聚類結果 大概率是由 2014 的 608 大小的尺寸 聚類出來的。
補充一點:聚類自己的數據集時,9個anchors 的排列順序依據是面積大小,也就是每個框兩個數據的乘積,面積小在前,面積大在後。
[10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326]
對應面積
[130,480,759,1830,2790,7021,10440,30888,121598]