1、原理
1.1 前言:
anchor box其實就是從訓練集的所有ground truth box中統計(使用k-means)出來的在訓練集中最經常出現的幾個box形狀和尺寸。可以預先將這些統計上的先驗(或來自人類的)經驗加入到模型中,這樣模型在學習的時候,瞎找的可能性就更小了些(玄學思想)。
1.2 公式:
1.3 效果:
只能加快模型收斂的速度,對模型的其他性能無影響
2、重置
2.1 模型下載:
使用GitHub原有模型,在此基礎上修改:
https://github.com/lars76/kmeans-anchor-boxes
2.2 模型修改:
將下載後的模型刪去不需要的部分:
將test文件刪除,留下example.py和kmeans.py,在根目錄下創建plt_anchor.py:
2.3 文件內容:
- example.py:
import glob
import xml.etree.ElementTree as ET
import tqdm
import numpy as np
from kmeans import kmeans, avg_iou
ANNOTATIONS_PATH = "E:/pytorch/ultralytics-yolov3/yolov3-sheep/data/Annotations/"
#以正斜槓/這種形式可以防止反斜槓帶來的轉義錯誤
CLUSTERS = 9
def load_dataset(path):
dataset = []
for xml_file in tqdm.tqdm(glob.glob("{}/*xml".format(path))):
print(xml_file)
tree = ET.parse(xml_file)
height = int(tree.findtext("./size/height"))
width = int(tree.findtext("./size/width"))
for obj in tree.iter("object"):
xmin = int(float(obj.findtext("bndbox/xmin"))) / width
ymin = int(float(obj.findtext("bndbox/ymin"))) / height
xmax = int(float(obj.findtext("bndbox/xmax"))) / width
ymax = int(float(obj.findtext("bndbox/ymax"))) / height
dataset.append([xmax - xmin, ymax - ymin])
return np.array(dataset)
data = load_dataset(ANNOTATIONS_PATH)
print(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}-{}".format(out[:, 0]*416, out[:, 1]*416))
ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))
將 ANNOTATIONS_PATH =’’ 路徑改成自己的.
運行後:
- plt_anchor.py:
import glob
import xml.etree.ElementTree as ET
import tqdm
import numpy as np
import matplotlib.pyplot as plt
from kmeans import kmeans, avg_iou
ANNOTATIONS_PATH = "E:/pytorch/ultralytics-yolov3/yolov3-sheep/data/Annotations/"
#以正斜槓/這種形式可以防止反斜槓帶來的轉義錯誤
CLUSTERS = 9
BBOX_NORMALIZE = True
def show_cluster(cluster, max_points=2000):
if len(data) > max_points:
idx = np.random.choice(len(data),max_points)
data = data[idx]
plt.scatter(data[:, 0], data[:, 1], s=5, c='lavender')
plt.scatter(cluster[:, 0], cluster[:, 1], c='red', s=100, marker="^")
plt.xlabel("Width")
plt.ylabel("Height")
plt.title("Bounding and anchor distribution")
plt.savefig("cluster.png")
plt.show()
def show_w_h(data,cluster,bins=50):
if data.dtype != np.float32:
data = data.astype(np.float32)
width = data[:, 0]
height = data[:, 1]
ratio = height / width
plt.figure(1, figsize=(20, 6))
plt.subplot(131)
plt.hist(width, bins=bins, color='blue', rwidth=0.8)
plt.xlabel('Width')
plt.ylabel('Number')
plt.grid(True, linestyle='-.')
plt.title('Distribution of Width')
plt.subplot(132)
plt.hist(height, bins=bins, color='green', rwidth=0.8)
plt.xlabel('Height')
plt.ylabel('Number')
plt.grid(True, linestyle='-.')
plt.title('Distribution of Height')
plt.subplot(133)
plt.hist(ratio, bins=bins, color='magenta', rwidth=0.8)
plt.xlabel('Height / Width')
plt.ylabel('Number')
plt.grid(True, linestyle='-.')
plt.title('Distribution of aspect ratio[Height / Width]')
plt.savefig("shape-distribution.png")
plt.show()
def sort_cluster(cluster):
if cluster.dtype != np.float32:
cluster = cluster.astype(np.float32)
area = cluster[:, 0] * cluster[:, 1]
ratio = cluster[:, 1:2] / cluster[:, 0:1]
return np.concatenate([cluster, ratio], axis=-1)
def load_dataset(path):
dataset = []
for xml_file in tqdm.tqdm(glob.glob("{}/*xml".format(path))):
print(xml_file)
tree = ET.parse(xml_file)
height = int(tree.findtext("./size/height"))
width = int(tree.findtext("./size/width"))
for obj in tree.iter("object"):
xmin = int(float(obj.findtext("bndbox/xmin"))) / width
ymin = int(float(obj.findtext("bndbox/ymin"))) / height
xmax = int(float(obj.findtext("bndbox/xmax"))) / width
ymax = int(float(obj.findtext("bndbox/ymax"))) / height
dataset.append([xmax - xmin, ymax - ymin])
return np.array(dataset)
data = load_dataset(ANNOTATIONS_PATH)
print(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
out_sorted = sort_cluster(out)
if out.dtype != np.float32:
cluster = out.astype(np.float32)
show_cluster(data, out, max_points=2000)
show_w_h(data, out, bins=50)
運行後:
簇類圖:
長寬圖: