數據集製作
基於MSCNN的人羣密度估計:
生成數據集
- 人羣密度數據標記
- 在labelme中選擇“points”對圖片中的人頭進行標記,標籤隨便填, 我記的是"crow";
- 解析第一步生成的json文件,生成所需的數據集;
json文件格式如下:
{
"version": "3.21.1",
"flags": {},
"shapes": [
{
"label": "crow",
"line_color": null,
"fill_color": null,
"points": [
[
59.74025974025974,
65.97402597402598
]
],
"shape_type": "point",
"flags": {}
},
{
"label": "crow",
"line_color": null,
"fill_color": null,
"points": [
[
42.42424242424242,
77.22943722943722
]
],
"shape_type": "point",
"flags": {}
},
……
"imagePath": "people7.jpg",
"imageData": "/9j/4AAQSkZJRgA",
"imageHeight": 189,
"imageWidth": 343
}
把其中包含的points數據解析出來即可,生成的數據集包含4項,分別是:圖片名,人數,密度等級,座標列表,存成*.mat格式
def create_crowLabel(self):
res_dic = {}
img_list = []
count_list = []
dense_list = []
potints_list = []
# 只包含1 - 99人頭數的圖片標籤
json_files = glob.glob(json_dir + r'\*.json')
for json_file in tqdm.tqdm(json_files):
img_name, pnts = self.json_parese(json_file)
crow_cnt = len(pnts)
img_list.append(img_name)
count_list.append(crow_cnt)
dense_list.append(1)
potints_list.append(pnts)
# 打亂順序
index_list = [i for i in range(len(img_list))]
random.shuffle(index_list)
img_temp = []
count_temp = []
dense_temp = []
point_temp = []
for index in index_list:
img_temp.append(img_list[index])
count_temp.append(count_list[index])
dense_temp.append(dense_list[index])
point_temp.append(potints_list[index])
res_dic['img'] = np.array(img_list)
res_dic['count'] = np.array(count_temp)
res_dic['dense'] = np.array(dense_list)
res_dic['points'] = np.array(point_temp)
savemat(os.path.join('../dataset/denselevel', 'crow_gt.mat'), res_dic)
- 圖片密度等級標記
只需要將圖片標記爲0,1,2即可,我花了半天時間將之前開發的一個複雜打標工具修改成了這個簡單的打標工具,源碼請參加:https://github.com/zzubqh/CrowdCount/tree/master/CrawDenseTool
保存的文件格式爲:圖片名,密度等級。大致如下:
對這個文件進行解析,生成類似於人羣密度數據的格式。
# 創建密度等級,粗分爲0,1-99,100以上三個標籤
def create_denselevelLabel(self):
res_dic = {}
img_list = []
dense_list = []
with open(dense_label, 'r') as rf:
for item in rf:
val = item.strip().split(',')
name = val[0]
dense_level = int(val[1])
img_list.append(name)
dense_list.append(dense_level)
res_dic['img'] = np.array(img_list)
res_dic['dense'] = np.array(dense_list)
savemat(os.path.join('../dataset/denselevel', 'dense_gt.mat'), res_dic)
用於訓練的數據生成器
在訓練的時候採用自動生成訓練/驗證數據的方式,按keras要求編寫數據生成器即可,具體參加https://github.com/zzubqh/CrowdCount/src/data.py文件
def gen_train(self, batch_size, size):
"""
生成數據生成器
:param batch_size:
:param size:
:return:
"""
index_all = list(range(int(len(self.filenames) * 0.8))) # 取出所有訓練數據下標,默認數據的前80%爲訓練集
i, n = 0, len(index_all)
if batch_size > n:
raise Exception('Batch size {} is larger than the number of dataset {}!'.format(batch_size, n))
while True:
if i + batch_size >= n:
np.random.shuffle(index_all)
i = 0
continue
batch_x, batch_y = [], []
for j in range(i, i + batch_size):
x, y = self.get_img_data(index_all[j], size)
batch_x.append(x)
batch_y.append(y)
i += batch_size
yield np.array(batch_x), np.array(batch_y)
def gen_valid(self, batch_size, size):
"""
生成數據生成器
:param batch_size:
:param size:
:return:
"""
index_all = list(range(int(len(self.filenames) * 0.8), len(self.filenames)))
i, n = 0, len(index_all)
if batch_size > n:
raise Exception('Batch size {} is larger than the number of dataset {}!'.format(batch_size, n))
while True:
if i + batch_size >= n:
np.random.shuffle(index_all)
i = 0
continue
batch_x, batch_y = [], []
for j in range(i, i + batch_size):
x, y = self.get_img_data(index_all[j], size)
batch_x.append(x)
batch_y.append(y)
i += batch_size
yield np.array(batch_x), np.array(batch_y)