yolov3使用聚類重置anchor

1、原理

1.1 前言:

anchor box其實就是從訓練集的所有ground truth box中統計(使用k-means)出來的在訓練集中最經常出現的幾個box形狀和尺寸。可以預先將這些統計上的先驗(或來自人類的)經驗加入到模型中,這樣模型在學習的時候,瞎找的可能性就更小了些(玄學思想)。

1.2 公式:
在這裏插入圖片描述
1.3 效果:

只能加快模型收斂的速度,對模型的其他性能無影響

2、重置

2.1 模型下載:
使用GitHub原有模型,在此基礎上修改:
https://github.com/lars76/kmeans-anchor-boxes

2.2 模型修改:
將下載後的模型刪去不需要的部分:
在這裏插入圖片描述
將test文件刪除,留下example.py和kmeans.py,在根目錄下創建plt_anchor.py:
在這裏插入圖片描述
2.3 文件內容:

  • example.py:
import glob
import xml.etree.ElementTree as ET
import tqdm
import numpy as np

from kmeans import kmeans, avg_iou

ANNOTATIONS_PATH = "E:/pytorch/ultralytics-yolov3/yolov3-sheep/data/Annotations/"
#以正斜槓/這種形式可以防止反斜槓帶來的轉義錯誤
CLUSTERS = 9

def load_dataset(path):
   dataset = []
   for xml_file in tqdm.tqdm(glob.glob("{}/*xml".format(path))):
      print(xml_file)
      tree = ET.parse(xml_file)

      height = int(tree.findtext("./size/height"))
      width = int(tree.findtext("./size/width"))

      for obj in tree.iter("object"):
         xmin = int(float(obj.findtext("bndbox/xmin"))) / width
         ymin = int(float(obj.findtext("bndbox/ymin"))) / height
         xmax = int(float(obj.findtext("bndbox/xmax"))) / width
         ymax = int(float(obj.findtext("bndbox/ymax"))) / height

         dataset.append([xmax - xmin, ymax - ymin])

   return np.array(dataset)


data = load_dataset(ANNOTATIONS_PATH)
print(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}-{}".format(out[:, 0]*416, out[:, 1]*416))

ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))

ANNOTATIONS_PATH =’’ 路徑改成自己的.
運行後:
在這裏插入圖片描述

  • plt_anchor.py:
import glob
import xml.etree.ElementTree as ET
import tqdm
import numpy as np
import matplotlib.pyplot as plt
from kmeans import kmeans, avg_iou

ANNOTATIONS_PATH = "E:/pytorch/ultralytics-yolov3/yolov3-sheep/data/Annotations/"
#以正斜槓/這種形式可以防止反斜槓帶來的轉義錯誤
CLUSTERS = 9
BBOX_NORMALIZE = True

def show_cluster(cluster, max_points=2000):
    if len(data) > max_points:
        idx = np.random.choice(len(data),max_points)
        data = data[idx]
    plt.scatter(data[:, 0], data[:, 1], s=5, c='lavender')
    plt.scatter(cluster[:, 0], cluster[:, 1], c='red', s=100, marker="^")
    plt.xlabel("Width")
    plt.ylabel("Height")
    plt.title("Bounding and anchor distribution")
    plt.savefig("cluster.png")
    plt.show()

def show_w_h(data,cluster,bins=50):
    if data.dtype != np.float32:
        data = data.astype(np.float32)
    width = data[:, 0]
    height = data[:, 1]
    ratio = height / width

    plt.figure(1, figsize=(20, 6))
    plt.subplot(131)
    plt.hist(width, bins=bins, color='blue', rwidth=0.8)
    plt.xlabel('Width')
    plt.ylabel('Number')
    plt.grid(True, linestyle='-.')
    plt.title('Distribution of Width')

    plt.subplot(132)
    plt.hist(height, bins=bins, color='green', rwidth=0.8)
    plt.xlabel('Height')
    plt.ylabel('Number')
    plt.grid(True, linestyle='-.')
    plt.title('Distribution of Height')

    plt.subplot(133)
    plt.hist(ratio, bins=bins, color='magenta', rwidth=0.8)
    plt.xlabel('Height / Width')
    plt.ylabel('Number')
    plt.grid(True, linestyle='-.')
    plt.title('Distribution of aspect ratio[Height / Width]')
    plt.savefig("shape-distribution.png")
    plt.show()

def sort_cluster(cluster):
    if cluster.dtype != np.float32:
        cluster = cluster.astype(np.float32)
    area = cluster[:, 0] * cluster[:, 1]
    ratio = cluster[:, 1:2] / cluster[:, 0:1]
    return np.concatenate([cluster, ratio], axis=-1)

def load_dataset(path):
    dataset = []
    for xml_file in tqdm.tqdm(glob.glob("{}/*xml".format(path))):
        print(xml_file)
        tree = ET.parse(xml_file)

        height = int(tree.findtext("./size/height"))
        width = int(tree.findtext("./size/width"))

        for obj in tree.iter("object"):
            xmin = int(float(obj.findtext("bndbox/xmin"))) / width
            ymin = int(float(obj.findtext("bndbox/ymin"))) / height
            xmax = int(float(obj.findtext("bndbox/xmax"))) / width
            ymax = int(float(obj.findtext("bndbox/ymax"))) / height

            dataset.append([xmax - xmin, ymax - ymin])

    return np.array(dataset)


data = load_dataset(ANNOTATIONS_PATH)
print(ANNOTATIONS_PATH)
out = kmeans(data, k=CLUSTERS)
out_sorted = sort_cluster(out)
if out.dtype != np.float32:
    cluster = out.astype(np.float32)
show_cluster(data, out, max_points=2000)

show_w_h(data, out, bins=50)

運行後:

簇類圖:在這裏插入圖片描述

長寬圖:在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章