版權所有,翻版必究。https://mp.csdn.net/console/editor/html/106094147
運行環境:WIN10,pycharm,相應的CUDA,CUDNN,tensorflow1.15.0,tensorflow-gpu-1.14.0,Anaconda3
第一步:Mask RCNN開源項目:https://github.com/matterport/Mask_RCNN
第一步:數據準備
首先安裝labelme進行數據的標註。安裝方法pip install labelme。根據後面程序中的有關問題,有些大佬說安裝labelme3.2版本。(網上很多使用方法,不會用可以查一查)
有關mask-rcnn算法可以參考:https://blog.csdn.net/linolzhang/article/details/71774168 (可以去看看論文比較好,網上容易搜索到)
我是用的是新版的labelme,對自己的數據進行標註。目前我做的是一類的。加上背景就是兩類。
類似於這種的一張圖對應一個json文件。
然後對json文件進行解析,使用的程序爲json_datasets.py:
'''
改程序是爲了跑自己的josn文件夾,針對每一張圖像來生成對應的5個數據
json_file:json數據輸入路徑
json_file1:相關數據輸出路徑
'''
# -*- coding: UTF-8 -*-
import argparse
import json
import os
import os.path as osp
import warnings
import os.path
import subprocess
import numpy as np
import PIL.Image
import cv2
import yaml
from labelme import utils
import draw_label
def main():
# 改爲自己的打標好存放jison文件的路徑
# json_file = 'C:/Users/QJ/Desktop/hh/total'
json_file = 'D:/lingyun/Mask_RCNN-master/img'
json_file1 = 'D:/lingyun/Mask_RCNN-master/data/labelme_json'
list = os.listdir(json_file)
for i in range(0, len(list)):
path = os.path.join(json_file, list[i])
print(path)
if os.path.isfile(path):
# data = json.load(open(path))
data = json.load(open(path, encoding='utf-8'))
img = utils.img_b64_to_arr(data['imageData'])
lbl, lbl_names = utils.labelme_shapes_to_label(img.shape, data['shapes'])
captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
# lbl_viz = utils.draw_label(lbl, img, captions)
lbl_viz = draw_label.draw_label(lbl, img, captions)
out_dir = osp.basename(list[i]).replace('.', '_')
out_dir = osp.join(osp.dirname(list[i]), out_dir)
# print("aaa: ", out_dir)
# out_dir = json_file + "/" + out_dir #原始
out_dir = json_file1 + "/" + out_dir #改
# print("bbb: ", out_dir)
if not osp.exists(out_dir):
os.mkdir(out_dir)
PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))
# PIL.Image.fromarray(lbl).save()
labelpath = osp.join(out_dir, 'label.png')
# PIL.Image.fromarray(lbl).save(labelpath)
# opencvimg16 = cv2.imread(labelpath)
# opencvimg.convertTo(opencvimg6,)
lbl8u = np.zeros((lbl.shape[0], lbl.shape[1]), dtype=np.uint8)
for i in range(lbl.shape[0]):
for j in range(lbl.shape[1]):
lbl8u[i, j] = lbl[i, j]
PIL.Image.fromarray(lbl8u).save(labelpath)
# Alllabelpath="%s"
PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))
with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
for lbl_name in lbl_names:
f.write(lbl_name + '\n')
warnings.warn('info.yaml is being replaced by label_names.txt')
info = dict(label_names=lbl_names)
with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
yaml.dump(info, f, default_flow_style=False)
fov = open(osp.join(out_dir, 'info.yaml'), 'w')
for key in info:
# print("key", key)
fov.writelines(key)
fov.write(':\n')
for k, v in lbl_names.items():
# print("k,v: ", k, v)
fov.write(' ')
fov.write(k)
fov.write(':\n')
fov.close()
print('Saved to: %s' % out_dir)
if __name__ == '__main__':
main()
如果utils中沒有draw_label函數。就尋找Anaconda3\Lib\site-packages\labelme\utils這個路徑下面是否有draw.py文件。如果也沒有就是用下面這個代碼。這個代碼就是draw.py。文件中我使用的是draw_label來命名的。然後調用這個draw_label.py中的draw_label函數。
'''該文件我的命名爲draw_label,目的是爲了上一個程序調用該程序中的draw_label函數'''
import io
import numpy as np
import PIL.Image
import PIL.ImageDraw
def label_colormap(N=256):
def bitget(byteval, idx):
return ((byteval & (1 << idx)) != 0)
cmap = np.zeros((N, 3))
for i in range(0, N):
id = i
r, g, b = 0, 0, 0
for j in range(0, 8):
r = np.bitwise_or(r, (bitget(id, 0) << 7 - j))
g = np.bitwise_or(g, (bitget(id, 1) << 7 - j))
b = np.bitwise_or(b, (bitget(id, 2) << 7 - j))
id = (id >> 3)
cmap[i, 0] = r
cmap[i, 1] = g
cmap[i, 2] = b
cmap = cmap.astype(np.float32) / 255
return cmap
# similar function as skimage.color.label2rgb
def label2rgb(lbl, img=None, n_labels=None, alpha=0.5, thresh_suppress=0):
if n_labels is None:
n_labels = len(np.unique(lbl))
cmap = label_colormap(n_labels)
cmap = (cmap * 255).astype(np.uint8)
lbl_viz = cmap[lbl]
lbl_viz[lbl == -1] = (0, 0, 0) # unlabeled
if img is not None:
img_gray = PIL.Image.fromarray(img).convert('LA')
img_gray = np.asarray(img_gray.convert('RGB'))
# img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)
lbl_viz = alpha * lbl_viz + (1 - alpha) * img_gray
lbl_viz = lbl_viz.astype(np.uint8)
return lbl_viz
def draw_label(label, img=None, label_names=None, colormap=None):
import matplotlib.pyplot as plt
backend_org = plt.rcParams['backend']
plt.switch_backend('agg')
plt.subplots_adjust(left=0, right=1, top=1, bottom=0,
wspace=0, hspace=0)
plt.margins(0, 0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
if label_names is None:
label_names = [str(l) for l in range(label.max() + 1)]
if colormap is None:
colormap = label_colormap(len(label_names))
label_viz = label2rgb(label, img, n_labels=len(label_names))
plt.imshow(label_viz)
plt.axis('off')
plt_handlers = []
plt_titles = []
for label_value, label_name in enumerate(label_names):
if label_value not in label:
continue
if label_name.startswith('_'):
continue
fc = colormap[label_value]
p = plt.Rectangle((0, 0), 1, 1, fc=fc)
plt_handlers.append(p)
plt_titles.append('{value}: {name}'
.format(value=label_value, name=label_name))
plt.legend(plt_handlers, plt_titles, loc='lower right', framealpha=.5)
f = io.BytesIO()
plt.savefig(f, bbox_inches='tight', pad_inches=0)
plt.cla()
plt.close()
plt.switch_backend(backend_org)
out_size = (label_viz.shape[1], label_viz.shape[0])
out = PIL.Image.open(f).resize(out_size, PIL.Image.BILINEAR).convert('RGB')
out = np.asarray(out)
return out
運行json_datasets.py(其中img中存放的是僅僅是json文件)
生成路徑:
每一個裏面五個數據。
擴展:https://blog.csdn.net/xjtdw/article/details/94741984這位大佬總結的好!
還有一種辦法,不過針對數據類別比較少的。
'''目前的是兩分類這麼寫'''
import argparse
import json
import os
import os.path as osp
import warnings
import copy
import shutil
import numpy as np
import PIL.Image
from skimage import io
import yaml
from labelme import utils
NAME_LABEL_MAP = {
'_background_': 0,
'你的類別': 1,
# 'Rock': 2,
# 'coal': 3,
}
def main():
json_file = '你的數據位置'
out_dir = '數據的輸出位置'
if not os.path.exists(out_dir):
os.mkdir(out_dir)
print(out_dir)
list = os.listdir(json_file)
for i in range(0, len(list)):
path = os.path.join(json_file, list[i])
if (list[i].split(".")[-1]) != "json":
continue
filename = list[i][:-5] # .json
print(filename)
# label_name_to_value = {'_background_': 0}
if os.path.isfile(path):
data = json.load(open(path))
img = utils.image.img_b64_to_arr(data['imageData'])
lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes']) # labelme_shapes_to_label
# lbl, lbl_names = utils.shape.shapes_to_label(img.shape, data['shapes']) # labelme_shapes_to_label
# lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)
# modify labels according to NAME_LABEL_MAP
lbl_tmp = copy.copy(lbl)
for key_name in lbl_names:
old_lbl_val = lbl_names[key_name]
new_lbl_val = NAME_LABEL_MAP[key_name]
lbl_tmp[lbl == old_lbl_val] = new_lbl_val
lbl_names_tmp = {}
for key_name in lbl_names:
lbl_names_tmp[key_name] = NAME_LABEL_MAP[key_name]
# Assign the new label to lbl and lbl_names dict
lbl = np.array(lbl_tmp, dtype=np.int8)
lbl_names = lbl_names_tmp
captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
# lbl_viz = utils.draw.draw_label(lbl, img, captions)
utils.lblsave(osp.join(out_dir, '{}.png'.format(filename)), lbl)
print('Saved to: %s' % out_dir)
if __name__ == '__main__':
main()
是對json文件的解析,會生成對應的mask圖,但是不是單通道的,還需要進一步處理。(補充,此中的數據位置中存放的是數據和json文件,均要)
然後使用下面代碼,將其轉化爲灰度圖:
import glob
import os.path
import numpy as np
from PIL import Image
import tensorflow as tf
FLAGS = tf.compat.v1.flags.FLAGS
tf.compat.v1.flags.DEFINE_string('original_gt_folder',
'你的mask數據',
'Original ground truth annotations.')
tf.compat.v1.flags.DEFINE_string('segmentation_format', 'png', 'Segmentation format.')
tf.compat.v1.flags.DEFINE_string('output_dir',
'生成的單通道的mask數據',
'folder to save modified ground truth annotations.')
def _remove_colormap(filename):
"""Removes the color map from the annotation.
Args:
filename: Ground truth annotation filename.
Returns:
Annotation without color map.
"""
return np.array(Image.open(filename))
def _save_annotation(annotation, filename):
"""Saves the annotation as png file.
Args:
annotation: Segmentation annotation.
filename: Output filename.
"""
pil_image = Image.fromarray(annotation.astype(dtype=np.uint8))
with tf.io.gfile.GFile(filename, mode='w') as f:
pil_image.save(f, 'PNG')
def main(unused_argv):
# Create the output directory if not exists.
if not tf.io.gfile.isdir(FLAGS.output_dir):
tf.io.gfile.makedirs(FLAGS.output_dir)
annotations = glob.glob(os.path.join(FLAGS.original_gt_folder,
'*.' + FLAGS.segmentation_format))
for annotation in annotations:
raw_annotation = _remove_colormap(annotation)
filename = os.path.basename(annotation)[:-4]
_save_annotation(raw_annotation,
os.path.join(
FLAGS.output_dir,
filename + '.' + FLAGS.segmentation_format))
if __name__ == '__main__':
tf.compat.v1.app.run()
至此獲取了對應的灰度掩模圖,看起來應該啥都沒有,是看起來,其實是有數據的。
第二步:訓練數據
將你的圖片和對應的掩模圖放在對應的位置中。上代碼:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
print(ROOT_DIR)
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
from PIL import Image
# %matplotlib inline
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "模型保存位置")
print(MODEL_DIR)
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "預訓練模型位置,mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
class ShapesConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "shapes"
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
GPU_COUNT = 1
IMAGES_PER_GPU = 8
# Number of classes (including background)
# NUM_CLASSES = 1 + 3 # background + 3 shapes
NUM_CLASSES = 1 + 1 # background + 1 shapes #此處需要更改!!!!
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
IMAGE_MIN_DIM = 128
IMAGE_MAX_DIM = 128
# Use smaller anchors because our image and objects are small
RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
# Reduce training ROIs per image because the images are small and have
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
TRAIN_ROIS_PER_IMAGE = 32
# Use a small epoch since the data is simple
# STEPS_PER_EPOCH = 100
STEPS_PER_EPOCH = 20
# use small validation steps since the epoch is small
VALIDATION_STEPS = 5
config = ShapesConfig()
config.display()
def get_ax(rows=1, cols=1, size=8):
"""
返回要在其中使用的Matplotlib Axes數組的所有可視化。 提供一個控制圖形大小的中心點。
更改默認大小屬性以控制大小,渲染圖像
"""
_, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
return ax
class ShapesDataset(utils.Dataset):
"""
count: 存放的圖片數據
img_floder: 圖像數據的路徑
mask_floder: 掩膜數據的路徑
imglist: 圖像列表
dataset_root_path: 數據的路徑,大路徑
"""
def load_shapes_process(self, count, img_floder, mask_floder, imglist, dataset_root_path):
#存放你的數據類型,此處需要更改
self.add_class("shapes", 1, "你的類別")
for i in range(count):
# 獲取圖片寬和高
filestr = imglist[i].split(".")[0]
mask_path = mask_floder + "/" + filestr + ".png"
cv_img = cv2.imread(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")
#主要爲了獲取數據的長寬,方法有很多不限此處一種
self.add_image("shapes", image_id=i, path=img_floder + "/" + imglist[i],
width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path)
def load_mask(self, image_id): #繼承重構!!!!需要自己寫一個!否則就是使用utils中的load_mask()
"""爲給定圖像ID的形狀生成實例掩膜 """
info = self.image_info[image_id]
count = 1 # number of object
img = Image.open(info['mask_path']) #輸入圖像的路徑,然後顯示該圖像
num_obj = np.max(img) #取掩膜圖中的最大值
mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8) #掩膜圖,h*w*num_obj
mask = self.draw_shape_process(num_obj, mask, img, image_id)
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8) #受用np的邏輯非函數,得到的時bool結果。然後將其轉化爲np.uiny8類型
for i in range(count - 2, -1, -1):
mask[:, :, i] = mask[:, :, i] * occlusion
occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
labels = ["類別名字1", "類別名字2"]
labels_form = []
for i in range(len(labels)):
if labels[i].find("類別名字1") != -1:
labels_form.append("類別名字1")
elif labels[i].find("類別名字2") != -1:
labels_form.append("類別名字2")
class_ids = np.array([self.class_names.index(s) for s in labels_form])
return mask, class_ids.astype(np.int32)
def draw_shape_process(self, num_obj, mask, image, image_id):
""" 根據給定的h*w*num_obj繪製形狀。"""
info = self.image_info[image_id]
for index in range(num_obj):
for i in range(info['width']):
for j in range(info['height']):
at_pixel = image.getpixel((i, j))
if at_pixel == index + 1:
mask[j, i, index] = 1
return mask
#基礎的設置
dataset_root_path="data/"
img_floder = "存放原始圖像的位置"
mask_floder = "存放掩膜的位置"
#yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)
# print(1)
# Training dataset 訓練數據
dataset_train = ShapesDataset()
# dataset_train.load_shapes(count, img_floder, mask_floder, imglist, dataset_root_path)
dataset_train.load_shapes_process(count, img_floder, mask_floder, imglist, dataset_root_path)
dataset_train.prepare()
# print(2)
# Validation dataset 測試數據
dataset_val = ShapesDataset()
# dataset_val.load_shapes(7, img_floder, mask_floder, imglist, dataset_root_path)
dataset_val.load_shapes_process(7, img_floder, mask_floder, imglist, dataset_root_path)
dataset_val.prepare()
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
# Which weights to start with?
init_with = "coco" # imagenet, coco, or last
if init_with == "imagenet":
model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
model.load_weights(COCO_MODEL_PATH, by_name=True,
exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
"mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
# Load the last model you trained and continue training
model.load_weights(model.find_last(), by_name=True)
#總的批次數據
# 訓練頭上的樹枝,傳遞layers =“ heads”將凍結除head以外的所有圖層
# 還可以傳遞正則表達式來選擇,按名稱模式訓練哪些層。
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=10,
layers='heads')
# 微調所有圖層。通過layers =“all”訓練所有層。 你也可以傳遞正則表達式以選擇要圖層。按名稱訓練模式。
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE / 10,
epochs=5,
layers="all")
需要更改的地方在代碼中給出,需要根據自己的實際情況進行修改。此代碼也需要加入預訓練模型,和我上一篇博客一樣,可以參考。
預訓練位置在:https://github.com/matterport/Mask_RCNN/releases
結果如下所示:
其中utils.py中的load_mask()一定需要重構,目的就是加載我們自己的數據mask。否則就一致爆出問題:
然後測試自己的數據就是使用你自己的.hs文件來替代預訓練模型,來對自己的數據進行檢測。
第三步:測試
# -*- coding: utf-8 -*-
import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import cv2
import time
from mrcnn.config import Config
from datetime import datetime
# Root directory of the project
ROOT_DIR = os.getcwd()
# Import Mask RCNN
sys.path.append(ROOT_DIR) # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples/coco/")) # To find local version
from samples.coco import coco
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs_process")
# Local path to trained weights file
'''此處放置的是你的.h5文件'''
COCO_MODEL_PATH = os.path.join(MODEL_DIR, "shapes20200512T1706\mask_rcnn_shapes_0005.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
utils.download_trained_weights(COCO_MODEL_PATH)
print("cuiwei***********************")
# Directory of images to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "data/pic") #你的數據位置
class ShapesConfig(Config):
"""Configuration for training on the toy shapes dataset.
Derives from the base Config class and overrides values specific
to the toy shapes dataset.
"""
# Give the configuration a recognizable name
NAME = "shapes"
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
GPU_COUNT = 1
IMAGES_PER_GPU = 1
# Number of classes (including background) #注意更改類別
NUM_CLASSES = 1 + 1 # background + 3 shapes
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
IMAGE_MIN_DIM = 320
IMAGE_MAX_DIM = 384
# Use smaller anchors because our image and objects are small
RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6) # anchor side in pixels
# Reduce training ROIs per image because the images are small and have
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
TRAIN_ROIS_PER_IMAGE =100
# Use a small epoch since the data is simple
STEPS_PER_EPOCH = 100
# use small validation steps since the epoch is small
VALIDATION_STEPS = 50
class InferenceConfig(ShapesConfig):
# Set batch size to 1 since we'll be running inference on
# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
GPU_COUNT = 1
IMAGES_PER_GPU = 1
config = InferenceConfig()
# model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)
# IMAGE_DIR = os.path.join(ROOT_DIR, "data/pic")
file_names_path = r"D:\lingyun\new_data_program\data\no_light_red"
file_names_img = os.listdir(file_names_path)
for file_names_img_i in file_names_img:
# print(file_names_path+"\\"+file_names_img_i)
img_path=file_names_path+"\\"+file_names_img_i
image = skimage.io.imread(img_path)
a=datetime.now()
# Run detection
results = model.detect([image], verbose=1)
b=datetime.now()
# Visualize results
print("shijian", (b-a).seconds)
r = results[0]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
class_names, r['scores'])