efficientdet
dataset.py
CocoDataset
init
def __init__(self, root_dir, set='train2017', transform=None):
self.root_dir = root_dir
self.set_name = set
self.transform = transform
self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
self.image_ids = self.coco.getImgIds()
self.load_classes()
load_classes
def load_classes(self):
categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x['id'])
self.classes = {}
self.coco_labels = {}
self.coco_labels_inverse = {}
for c in categories:
self.coco_labels[len(self.classes)] = c['id']
self.coco_labels_inverse[c['id']] = len(self.classes)
self.classes[c['name']] = len(self.classes)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
load_image
def load_image(self, image_index):
image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
path = os.path.join(self.root_dir, self.set_name, image_info['file_name'])
img = cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img.astype(np.float32) / 255.
load_annotations
def load_annotations(self, image_index):
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
annotations = np.zeros((0, 5))
if len(annotations_ids) == 0:
return annotations
coco_annotations = self.coco.loadAnns(annotations_ids)
for idx, a in enumerate(coco_annotations):
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
annotation = np.zeros((1, 5))
annotation[0, :4] = a['bbox']
annotation[0, 4] = self.coco_label_to_label(a['category_id'])
annotations = np.append(annotations, annotation, axis=0)
annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
return annotations
collater
def collater(data):
imgs = [s['img'] for s in data]
annots = [s['annot'] for s in data]
scales = [s['scale'] for s in data]
imgs = torch.from_numpy(np.stack(imgs, axis=0))
max_num_annots = max(annot.shape[0] for annot in annots)
if max_num_annots > 0:
annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
if max_num_annots > 0:
for idx, annot in enumerate(annots):
if annot.shape[0] > 0:
annot_padded[idx, :annot.shape[0], :] = annot
else:
annot_padded = torch.ones((len(annots), 1, 5)) * -1
imgs = imgs.permute(0, 3, 1, 2)
return {'img': imgs, 'annot': annot_padded, 'scale': scales}
Resizer
class Resizer(object):
"""Convert ndarrays in sample to Tensors."""
"""
1. 等比例縮放長寬,縮放後長寬比不變;
2. padding長寬到指定img_size;
3. 按照縮放比例修改annots;
"""
def __init__(self, img_size=512):
self.img_size = img_size
def __call__(self, sample):
image, annots = sample['img'], sample['annot']
height, width, _ = image.shape
if height > width:
scale = self.img_size / height
resized_height = self.img_size
resized_width = int(width * scale)
else:
scale = self.img_size / width
resized_height = int(height * scale)
resized_width = self.img_size
image = cv2.resize(image, (resized_width, resized_height), interpolation=cv2.INTER_LINEAR)
new_image = np.zeros((self.img_size, self.img_size, 3))
new_image[0:resized_height, 0:resized_width] = image
annots[:, :4] *= scale
return {'img': torch.from_numpy(new_image).to(torch.float32), 'annot': torch.from_numpy(annots), 'scale': scale}
Augmenter
class Augmenter(object):
"""Convert ndarrays in sample to Tensors."""
"""
1. 數據增強,將圖片按照第二維度反轉;
2. 修改annots;
"""
def __call__(self, sample, flip_x=0.5):
if np.random.rand() < flip_x:
image, annots = sample['img'], sample['annot']
image = image[:, ::-1, :]
rows, cols, channels = image.shape
x1 = annots[:, 0].copy()
x2 = annots[:, 2]
annots[:, 0] = cols - x2
annots[:, 2] = cols - x1
sample = {'img': image, 'annot': annots}
return sample
Normalizer