一 数据准备
在object_detection下建立文件夹my_mask_rcnn,把下载下来的数据放进去。
不想自己label的直接下载相关文档;链接主要包含原始图片,标注后的json格式数据,Abyssinian_label_map.pbtxt(类别映射表)。
二 生成train.record val.record
create_tf_record.py用大佬修改的:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 26 10:57:09 2018
@author: shirhe-lyh
"""
"""Convert raw dataset to TFRecord for object_detection.
Please note that this tool only applies to labelme's annotations(json file).
Example usage:
python3 create_tf_record.py \
--images_dir=your absolute path to read images.
--annotations_json_dir=your path to annotaion json files.
--label_map_path=your path to label_map.pbtxt
--output_path=your path to write .record.
"""
import cv2
import glob
import hashlib
import io
import json
import numpy as np
import os
import PIL.Image
import tensorflow as tf
import read_pbtxt_file
flags = tf.app.flags
flags.DEFINE_string('images_dir', None, 'Path to images directory.')
flags.DEFINE_string('annotations_json_dir', 'datasets/annotations/train',
'Path to annotations directory.')
flags.DEFINE_string('label_map_path', None, 'Path to label map proto.')
flags.DEFINE_string('output_path', None, 'Path to the output tfrecord.')
FLAGS = flags.FLAGS
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def int64_list_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def bytes_list_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def create_tf_example(annotation_dict, label_map_dict=None):
"""Converts image and annotations to a tf.Example proto.
Args:
annotation_dict: A dictionary containing the following keys:
['height', 'width', 'filename', 'sha256_key', 'encoded_jpg',
'format', 'xmins', 'xmaxs', 'ymins', 'ymaxs', 'masks',
'class_names'].
label_map_dict: A dictionary maping class_names to indices.
Returns:
example: The converted tf.Example.
Raises:
ValueError: If label_map_dict is None or is not containing a class_name.
"""
if annotation_dict is None:
return None
if label_map_dict is None:
raise ValueError('`label_map_dict` is None')
height = annotation_dict.get('height', None)
width = annotation_dict.get('width', None)
filename = annotation_dict.get('filename', None)
sha256_key = annotation_dict.get('sha256_key', None)
encoded_jpg = annotation_dict.get('encoded_jpg', None)
image_format = annotation_dict.get('format', None)
xmins = annotation_dict.get('xmins', None)
xmaxs = annotation_dict.get('xmaxs', None)
ymins = annotation_dict.get('ymins', None)
ymaxs = annotation_dict.get('ymaxs', None)
masks = annotation_dict.get('masks', None)
class_names = annotation_dict.get('class_names', None)
labels = []
for class_name in class_names:
label = label_map_dict.get(class_name, 'None')
if label is None:
raise ValueError('`label_map_dict` is not containing {}.'.format(
class_name))
labels.append(label)
encoded_masks = []
for mask in masks:
pil_image = PIL.Image.fromarray(mask.astype(np.uint8))
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_masks.append(output_io.getvalue())
feature_dict = {
'image/height': int64_feature(height),
'image/width': int64_feature(width),
'image/filename': bytes_feature(filename.encode('utf8')),
'image/source_id': bytes_feature(filename.encode('utf8')),
'image/key/sha256': bytes_feature(sha256_key.encode('utf8')),
'image/encoded': bytes_feature(encoded_jpg),
'image/format': bytes_feature(image_format.encode('utf8')),
'image/object/bbox/xmin': float_list_feature(xmins),
'image/object/bbox/xmax': float_list_feature(xmaxs),
'image/object/bbox/ymin': float_list_feature(ymins),
'image/object/bbox/ymax': float_list_feature(ymaxs),
'image/object/mask': bytes_list_feature(encoded_masks),
'image/object/class/label': int64_list_feature(labels)}
example = tf.train.Example(features=tf.train.Features(
feature=feature_dict))
return example
def _get_annotation_dict(images_dir, annotation_json_path):
"""Get boundingboxes and masks.
Args:
images_dir: Path to images directory.
annotation_json_path: Path to annotated json file corresponding to
the image. The json file annotated by labelme with keys:
['lineColor', 'imageData', 'fillColor', 'imagePath', 'shapes',
'flags'].
Returns:
annotation_dict: A dictionary containing the following keys:
['height', 'width', 'filename', 'sha256_key', 'encoded_jpg',
'format', 'xmins', 'xmaxs', 'ymins', 'ymaxs', 'masks',
'class_names'].
#
# Raises:
# ValueError: If images_dir or annotation_json_path is not exist.
"""
# if not os.path.exists(images_dir):
# raise ValueError('`images_dir` is not exist.')
#
# if not os.path.exists(annotation_json_path):
# raise ValueError('`annotation_json_path` is not exist.')
if (not os.path.exists(images_dir) or
not os.path.exists(annotation_json_path)):
return None
with open(annotation_json_path, 'r') as f:
json_text = json.load(f)
shapes = json_text.get('shapes', None)
if shapes is None:
return None
image_relative_path = json_text.get('imagePath', None)
if image_relative_path is None:
return None
image_name = image_relative_path.split('/')[-1]
image_path = os.path.join(images_dir, image_name)
image_format = image_name.split('.')[-1].replace('jpg', 'jpeg')
if not os.path.exists(image_path):
return None
with tf.gfile.GFile(image_path, 'rb') as fid:
encoded_jpg = fid.read()
image = cv2.imread(image_path)
height = image.shape[0]
width = image.shape[1]
key = hashlib.sha256(encoded_jpg).hexdigest()
xmins = []
xmaxs = []
ymins = []
ymaxs = []
masks = []
class_names = []
hole_polygons = []
for mark in shapes:
class_name = mark.get('label')
class_names.append(class_name)
polygon = mark.get('points')
polygon = np.array(polygon)
if class_name == 'hole':
hole_polygons.append(polygon)
else:
mask = np.zeros(image.shape[:2])
cv2.fillPoly(mask, [polygon], 1)
masks.append(mask)
# Boundingbox
x = polygon[:, 0]
y = polygon[:, 1]
xmin = np.min(x)
xmax = np.max(x)
ymin = np.min(y)
ymax = np.max(y)
xmins.append(float(xmin) / width)
xmaxs.append(float(xmax) / width)
ymins.append(float(ymin) / height)
ymaxs.append(float(ymax) / height)
# Remove holes in mask
for mask in masks:
mask = cv2.fillPoly(mask, hole_polygons, 0)
annotation_dict = {'height': height,
'width': width,
'filename': image_name,
'sha256_key': key,
'encoded_jpg': encoded_jpg,
'format': image_format,
'xmins': xmins,
'xmaxs': xmaxs,
'ymins': ymins,
'ymaxs': ymaxs,
'masks': masks,
'class_names': class_names}
return annotation_dict
def main(_):
if not os.path.exists(FLAGS.images_dir):
raise ValueError('`images_dir` is not exist.')
if not os.path.exists(FLAGS.annotations_json_dir):
raise ValueError('`annotations_json_dir` is not exist.')
if not os.path.exists(FLAGS.label_map_path):
raise ValueError('`label_map_path` is not exist.')
label_map = read_pbtxt_file.get_label_map_dict(FLAGS.label_map_path)
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
num_annotations_skiped = 0
annotations_json_path = os.path.join(FLAGS.annotations_json_dir, '*.json')
for i, annotation_file in enumerate(glob.glob(annotations_json_path)):
if i % 100 == 0:
print('On image %d', i)
annotation_dict = _get_annotation_dict(
FLAGS.images_dir, annotation_file)
if annotation_dict is None:
num_annotations_skiped += 1
continue
tf_example = create_tf_example(annotation_dict, label_map)
writer.write(tf_example.SerializeToString())
print('Successfully created TFRecord to {}.'.format(FLAGS.output_path))
if __name__ == '__main__':
tf.app.run()
可以把下载下来的json文件前8张作为train,后两张作为val;
也就是将下载下来的数据中datasets/annotation 文件夹分为train和val
假设你的所有原始图像的路径为 path_to_images_dir,使用 labelme 标注产生的所有用于 训练 的 json 文件的路径为 path_to_train_annotations_json_dir(train),所有用于 验证 的 json 文件的路径为 path_to_val_annotaions_json_dir(val),进入my_mask_rcnn在终端先后执行如下指令:
$ python3 create_tf_record.py \
--images_dir=datasets/images \
--annotations_json_dir=datasets/train\
--label_map_path=Abyssinian_label_map.pbtxt\
--output_path=my_train.record
$ python3 create_tf_record.py \
--images_dir=datasets/images \
--annotations_json_dir=datasets/val\
--label_map_path=Abyssinian_label_map.pbtxt \
--output_path=my_val.record
报错没有cv2就装一个 pip3 install opencv-python
三 模型下载
模型下载链接
我下载的模型是mask_rcnn_inception_v2_coco_2018_01_28.tar.gz
存放地址为my_mask_rcnn/
并且解压到当前目录
四 修改配置文件
将其中的 num_classes : 90 改为你要检测的目标总类目数,比如,因为这里我们只检测 阿比西尼亚猫 一个类,所以改为 1。另外,还需要将该文件中 5 个 PATH_TO_BE_CONFIGURED 改为相应文件的路径;可从下载下来解压后的配置文件修改。‘
# Mask R-CNN with Inception V2
# Configured for MSCOCO Dataset.
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 1
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 800
max_dimension: 1365
}
}
number_of_stages: 3
feature_extractor {
type: 'faster_rcnn_inception_v2'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
predict_instance_masks: true
mask_height: 15
mask_width: 15
mask_prediction_conv_depth: 0
mask_prediction_num_conv_layers: 2
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
second_stage_mask_prediction_loss_weight: 4.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0002
schedule {
step: 900000
learning_rate: .00002
}
schedule {
step: 1200000
learning_rate: .000002
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "my_mask_rcnn/mask_rcnn_inception_v2_coco/model.ckpt"
from_detection_checkpoint: true
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 2000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "my_mask_rcnn/my_train.record"
}
label_map_path: "my_mask_rcnn/Abyssinian_label_map.pbtxt"
load_instance_masks: true
mask_type: PNG_MASKS
}
eval_config: {
num_examples: 10
# Note: The below line limits the evaluation process to 10 evaluations.
# Remove the below line to evaluate indefinitely.
max_evals: 10
}
eval_input_reader: {
tf_record_input_reader {
input_path: "my_mask_rcnn/my_val.record"
}
label_map_path: "my_mask_rcnn/Abyssinian_label_map.pbtxt"
load_instance_masks: true
mask_type: PNG_MASKS
shuffle: false
num_readers: 1
}
五训练
model_dir 用来存放训练过程中的数据 老版本是train_dir
$ python3 model_main.py \
--model_dir=my_mask_rcnn/ model_dir\
--pipeline_config_path=my_mask_rcnn/mask_rcnn_inception_v2_coco.config
运行到第70次~
还有训练的时候老是报错超内存
应该如何解决呢?
六 导出结果并且测试
1导出冻结文件
虽然超出内存但是还是会保存训练到一定迭代步数的数据(200步)。
model_dir生成数据情况如下:
在object_detection下执行
在my_mask_rcnn下建立文件夹trained,储放导出文件
python3 export_inference_graph.py
--input_tpye_image_tensor
--pipeline_config_path my_mask_rcnn/mask_rcnn_inception_v2_coco.config
--trained_checkpoint_prefix my_mask_rcnn/model_dir/model.ckpt200
--output_directory my_mask_rcnn/trained/
其中会有一个冻结文件。
2测试
首先复制两张猫的图片到object_detection/test_images 修改名字为image+序号.jpg
其次再research/ 右击打开终端
jupyter-notebook
点击object_detection
打开object_detection_tutorial.ipynb
修改文档
修改范围;图片序号为5则改为(1,6):1,2,3,4,5,
因为提前下好了模型,则不需要download,则把下载模型单元删除了。
效果如下,因为只用了8张训练,并且迭代次数为200次,效果很粗糙,只是熟悉过程。
总结 -温习过程
1.训练需要准备的数据有,原图(3通道.jpg格式),label后的数据(.json),类别标签映射表(Abyssinian_label_map.pbtxt)
2转换为tfrecord格式。(需要create_tf_record.py,原图,.json数据,类别标签映射表)
python3 create_tf_record.py
–images_dir=datasets/images \
–annotations_json_dir=datasets/train\
–label_map_path=Abyssinian_label_map.pbtxt
–output_path=my_train.record
3.下载模型并修改其中自带的配置表
4.导出冻结文件并且测试!