目錄
讀取標籤文件
目標檢測的標籤文件一般格式形式是xml格式,可使用labelImg標註工具進行標註。
將全部標籤文件與源圖片路徑讀取出來保存到列表中,返回,以供後續處理使用。
讀取文件代碼:
文件名:xml_parse.py
# -*- coding: utf-8 -*-
import os, glob
import numpy as np
import xml.etree.ElementTree as ET
def paras_annotation(img_dir, ann_dir, labels):
"""
:param img_dir: image path
:param ann_dir: annotation xml file path
:param labels: ("class1", "class2",...,), 背景默認爲0
:function: paras annotation info from xml file
:return:
"""
imgs_info = [] #存儲所有圖片信息的容器列表
max_boxes = 0 #計算所有圖片中,目標在一張圖片中所可能出現的最大數量
# for each annotation xml file
for ann in os.listdir(ann_dir): # 遍歷文件夾中所有的xml文件, 返回值是xml的地址
tree = ET.parse(os.path.join(ann_dir, ann)) #使用xml內置函數讀取xml文件,並返回一個可讀取節點的句柄
img_info = dict() # 爲每一個標籤xml文件創建一個內容存放容器字典
boxes_counter = 0 # 計算該標籤文件中所含有的目標數量
# 由於每張標籤中,目標存在數量可能大於1, 所有將object內容格式設置爲列表,以存放多個object
img_info['object'] = []
for elem in tree.iter(): # 遍歷xml文件中所有的節點
if 'filename' in elem.tag: # 讀取文件名,將文件絕對路徑存儲在字典中
img_info['filename'] = os.path.join(img_dir, elem.text)
# 讀取標籤中目標的寬,高, 通道默認爲3不進行讀取
if 'width' in elem.tag:
img_info['width'] = int(elem.text)
# assert img_info['width'] == 512 #用於斷言圖片的寬高爲512 512
if 'height' in elem.tag:
img_info['height'] = int(elem.text)
# assert img_info['height'] == 512
if 'object' in elem.tag or 'part' in elem.tag: # 讀取目標框的信息
# 目標框信息存儲方式:x1-y1-x2-y2-label
object_info = [0, 0, 0, 0, 0] # 創建存儲目標框信息的容器列表
boxes_counter += 1
for attr in list(elem): # 循環讀取子節點
if 'name' in attr.tag: # 目標名
label = labels.index(attr.text) + 1 # 返回索引值 並加1, 因爲背景爲0
object_info[4] = label
if 'bndbox' in attr.tag: # bndbox的信息
for pos in list(attr):
if 'xmin' in pos.tag:
object_info[0] = int(pos.text)
if 'ymin' in pos.tag:
object_info[1] = int(pos.text)
if 'xmax' in pos.tag:
object_info[2] = int(pos.text)
if 'ymax' in pos.tag:
object_info[3] = int(pos.text)
# object shape: [n, 5],是一個列表,但包含n個子列表,每個子列表有5個內容
img_info['object'].append(object_info)
imgs_info.append(img_info) # filename, w/h/box_info
# (N,5)=(max_objects_num, 5)
if boxes_counter > max_boxes:
max_boxes = boxes_counter
# the maximum boxes number is max_boxes
# 將讀取的object信息轉化爲一個矩陣形式:[b, max_objects_num, 5]
boxes = np.zeros([len(imgs_info), max_boxes, 5])
print(boxes.shape)
imgs = [] # filename list
for i, img_info in enumerate(imgs_info):
# [N,5]
img_boxes = np.array(img_info['object']) # img_boxes.shape[N, 5]
# overwrite the N boxes info
boxes[i, :img_boxes.shape[0]] = img_boxes
imgs.append(img_info['filename']) # 文件名
# print(img_info['filename'], boxes[i,:5])
# imgs: list of image path
# boxes: [b,40,5]
return imgs, boxes
測試代碼:
# 測試代碼
if __name__ == "__main__":
img_path = "data\\val\\image" #圖片路徑
annotation_path = "data\\val\\annotation" # 標籤路徑
label = ("sugarbeet", "weed") # 自定義的標籤名字,背景不寫,默認爲0
img, box = paras_annotation(img_path, annotation_path, label)
print(img[0])
print(box.shape)
print(box[0])
未完待續。。。。。