win10系統下使用Mask_Rcnn訓練自己的數據

版權所有,翻版必究。https://mp.csdn.net/console/editor/html/106094147

運行環境:WIN10,pycharm,相應的CUDA,CUDNN,tensorflow1.15.0,tensorflow-gpu-1.14.0,Anaconda3

第一步:Mask RCNN開源項目:https://github.com/matterport/Mask_RCNN

第一步:數據準備

首先安裝labelme進行數據的標註。安裝方法pip install labelme。根據後面程序中的有關問題,有些大佬說安裝labelme3.2版本。(網上很多使用方法,不會用可以查一查)

有關mask-rcnn算法可以參考:https://blog.csdn.net/linolzhang/article/details/71774168 (可以去看看論文比較好,網上容易搜索到)

我是用的是新版的labelme,對自己的數據進行標註。目前我做的是一類的。加上背景就是兩類。

類似於這種的一張圖對應一個json文件。

然後對json文件進行解析,使用的程序爲json_datasets.py:

'''
改程序是爲了跑自己的josn文件夾,針對每一張圖像來生成對應的5個數據
json_file:json數據輸入路徑
json_file1:相關數據輸出路徑
'''

# -*- coding: UTF-8 -*-
import argparse
import json
import os
import os.path as osp
import warnings
import os.path
import subprocess
import numpy as np
import PIL.Image
import cv2
import yaml
from labelme import utils
import draw_label

def main():
    # 改爲自己的打標好存放jison文件的路徑
    # json_file = 'C:/Users/QJ/Desktop/hh/total'
    json_file = 'D:/lingyun/Mask_RCNN-master/img'
    json_file1 = 'D:/lingyun/Mask_RCNN-master/data/labelme_json'
    list = os.listdir(json_file)
    for i in range(0, len(list)):
        path = os.path.join(json_file, list[i])
        print(path)
        if os.path.isfile(path):
            # data = json.load(open(path))
            data = json.load(open(path, encoding='utf-8'))
            img = utils.img_b64_to_arr(data['imageData'])
            lbl, lbl_names = utils.labelme_shapes_to_label(img.shape, data['shapes'])

            captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
            # lbl_viz = utils.draw_label(lbl, img, captions)
            lbl_viz = draw_label.draw_label(lbl, img, captions)

            out_dir = osp.basename(list[i]).replace('.', '_')
            out_dir = osp.join(osp.dirname(list[i]), out_dir)
            # print("aaa: ", out_dir)
            # out_dir = json_file + "/" + out_dir   #原始
            out_dir = json_file1 + "/" + out_dir  #改
            # print("bbb: ", out_dir)
            if not osp.exists(out_dir):
                os.mkdir(out_dir)

            PIL.Image.fromarray(img).save(osp.join(out_dir, 'img.png'))
            # PIL.Image.fromarray(lbl).save()
            labelpath = osp.join(out_dir, 'label.png')

            # PIL.Image.fromarray(lbl).save(labelpath)
            # opencvimg16 = cv2.imread(labelpath)
            # opencvimg.convertTo(opencvimg6,)
            lbl8u = np.zeros((lbl.shape[0], lbl.shape[1]), dtype=np.uint8)
            for i in range(lbl.shape[0]):
                for j in range(lbl.shape[1]):
                    lbl8u[i, j] = lbl[i, j]
            PIL.Image.fromarray(lbl8u).save(labelpath)
            # Alllabelpath="%s"

        PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, 'label_viz.png'))

        with open(osp.join(out_dir, 'label_names.txt'), 'w') as f:
            for lbl_name in lbl_names:
                f.write(lbl_name + '\n')

        warnings.warn('info.yaml is being replaced by label_names.txt')
        info = dict(label_names=lbl_names)
        with open(osp.join(out_dir, 'info.yaml'), 'w') as f:
            yaml.dump(info, f, default_flow_style=False)

        fov = open(osp.join(out_dir, 'info.yaml'), 'w')
        for key in info:
            # print("key", key)
            fov.writelines(key)
            fov.write(':\n')
        for k, v in lbl_names.items():
            # print("k,v: ", k, v)
            fov.write('  ')
            fov.write(k)
            fov.write(':\n')

        fov.close()
        print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    main()

如果utils中沒有draw_label函數。就尋找Anaconda3\Lib\site-packages\labelme\utils這個路徑下面是否有draw.py文件。如果也沒有就是用下面這個代碼。這個代碼就是draw.py。文件中我使用的是draw_label來命名的。然後調用這個draw_label.py中的draw_label函數。

'''該文件我的命名爲draw_label,目的是爲了上一個程序調用該程序中的draw_label函數'''
import io

import numpy as np
import PIL.Image
import PIL.ImageDraw


def label_colormap(N=256):
    def bitget(byteval, idx):
        return ((byteval & (1 << idx)) != 0)

    cmap = np.zeros((N, 3))
    for i in range(0, N):
        id = i
        r, g, b = 0, 0, 0
        for j in range(0, 8):
            r = np.bitwise_or(r, (bitget(id, 0) << 7 - j))
            g = np.bitwise_or(g, (bitget(id, 1) << 7 - j))
            b = np.bitwise_or(b, (bitget(id, 2) << 7 - j))
            id = (id >> 3)
        cmap[i, 0] = r
        cmap[i, 1] = g
        cmap[i, 2] = b
    cmap = cmap.astype(np.float32) / 255
    return cmap


# similar function as skimage.color.label2rgb
def label2rgb(lbl, img=None, n_labels=None, alpha=0.5, thresh_suppress=0):
    if n_labels is None:
        n_labels = len(np.unique(lbl))

    cmap = label_colormap(n_labels)
    cmap = (cmap * 255).astype(np.uint8)

    lbl_viz = cmap[lbl]
    lbl_viz[lbl == -1] = (0, 0, 0)  # unlabeled

    if img is not None:
        img_gray = PIL.Image.fromarray(img).convert('LA')
        img_gray = np.asarray(img_gray.convert('RGB'))
        # img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        # img_gray = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)
        lbl_viz = alpha * lbl_viz + (1 - alpha) * img_gray
        lbl_viz = lbl_viz.astype(np.uint8)

    return lbl_viz


def draw_label(label, img=None, label_names=None, colormap=None):
    import matplotlib.pyplot as plt
    backend_org = plt.rcParams['backend']
    plt.switch_backend('agg')

    plt.subplots_adjust(left=0, right=1, top=1, bottom=0,
                        wspace=0, hspace=0)
    plt.margins(0, 0)
    plt.gca().xaxis.set_major_locator(plt.NullLocator())
    plt.gca().yaxis.set_major_locator(plt.NullLocator())

    if label_names is None:
        label_names = [str(l) for l in range(label.max() + 1)]

    if colormap is None:
        colormap = label_colormap(len(label_names))

    label_viz = label2rgb(label, img, n_labels=len(label_names))
    plt.imshow(label_viz)
    plt.axis('off')

    plt_handlers = []
    plt_titles = []
    for label_value, label_name in enumerate(label_names):
        if label_value not in label:
            continue
        if label_name.startswith('_'):
            continue
        fc = colormap[label_value]
        p = plt.Rectangle((0, 0), 1, 1, fc=fc)
        plt_handlers.append(p)
        plt_titles.append('{value}: {name}'
                          .format(value=label_value, name=label_name))
    plt.legend(plt_handlers, plt_titles, loc='lower right', framealpha=.5)

    f = io.BytesIO()
    plt.savefig(f, bbox_inches='tight', pad_inches=0)
    plt.cla()
    plt.close()

    plt.switch_backend(backend_org)

    out_size = (label_viz.shape[1], label_viz.shape[0])
    out = PIL.Image.open(f).resize(out_size, PIL.Image.BILINEAR).convert('RGB')
    out = np.asarray(out)
    return out

運行json_datasets.py(其中img中存放的是僅僅是json文件)

生成路徑:

每一個裏面五個數據。

擴展:https://blog.csdn.net/xjtdw/article/details/94741984這位大佬總結的好!

還有一種辦法,不過針對數據類別比較少的

'''目前的是兩分類這麼寫'''
import argparse
import json
import os
import os.path as osp
import warnings
import copy
import shutil
import numpy as np
import PIL.Image
from skimage import io
import yaml

from labelme import utils

NAME_LABEL_MAP = {
    '_background_': 0,
     '你的類別': 1,
    # 'Rock': 2,
    # 'coal': 3,
}

def main():

    json_file = '你的數據位置'

    out_dir = '數據的輸出位置'

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
    print(out_dir)
    list = os.listdir(json_file)
    for i in range(0, len(list)):
        path = os.path.join(json_file, list[i])
        if (list[i].split(".")[-1]) != "json":
            continue
        filename = list[i][:-5]  # .json
        print(filename)
        # label_name_to_value = {'_background_': 0}
        if os.path.isfile(path):

            data = json.load(open(path))
            img = utils.image.img_b64_to_arr(data['imageData'])
            lbl, lbl_names = utils.shape.labelme_shapes_to_label(img.shape, data['shapes'])  # labelme_shapes_to_label
            # lbl, lbl_names = utils.shape.shapes_to_label(img.shape, data['shapes'])  # labelme_shapes_to_label
            # lbl = utils.shapes_to_label(img.shape, data['shapes'], label_name_to_value)

            # modify labels according to NAME_LABEL_MAP
            lbl_tmp = copy.copy(lbl)
            for key_name in lbl_names:
                old_lbl_val = lbl_names[key_name]
                new_lbl_val = NAME_LABEL_MAP[key_name]
                lbl_tmp[lbl == old_lbl_val] = new_lbl_val
            lbl_names_tmp = {}
            for key_name in lbl_names:
                lbl_names_tmp[key_name] = NAME_LABEL_MAP[key_name]

            # Assign the new label to lbl and lbl_names dict
            lbl = np.array(lbl_tmp, dtype=np.int8)
            lbl_names = lbl_names_tmp
            captions = ['%d: %s' % (l, name) for l, name in enumerate(lbl_names)]
            # lbl_viz = utils.draw.draw_label(lbl, img, captions)
            utils.lblsave(osp.join(out_dir, '{}.png'.format(filename)), lbl)
            print('Saved to: %s' % out_dir)


if __name__ == '__main__':
    main()



是對json文件的解析,會生成對應的mask圖,但是不是單通道的,還需要進一步處理。(補充,此中的數據位置中存放的是數據和json文件,均要)

然後使用下面代碼,將其轉化爲灰度圖:

import glob
import os.path
import numpy as np

from PIL import Image

import tensorflow as tf

FLAGS = tf.compat.v1.flags.FLAGS

tf.compat.v1.flags.DEFINE_string('original_gt_folder',
                                 '你的mask數據',
                                 'Original ground truth annotations.')

tf.compat.v1.flags.DEFINE_string('segmentation_format', 'png', 'Segmentation format.')

tf.compat.v1.flags.DEFINE_string('output_dir',
                                 '生成的單通道的mask數據',
                                 'folder to save modified ground truth annotations.')


def _remove_colormap(filename):
  """Removes the color map from the annotation.

  Args:
    filename: Ground truth annotation filename.

  Returns:
    Annotation without color map.
  """
  return np.array(Image.open(filename))


def _save_annotation(annotation, filename):
  """Saves the annotation as png file.

  Args:
    annotation: Segmentation annotation.
    filename: Output filename.
  """
  pil_image = Image.fromarray(annotation.astype(dtype=np.uint8))
  with tf.io.gfile.GFile(filename, mode='w') as f:
    pil_image.save(f, 'PNG')


def main(unused_argv):
  # Create the output directory if not exists.
  if not tf.io.gfile.isdir(FLAGS.output_dir):
    tf.io.gfile.makedirs(FLAGS.output_dir)

  annotations = glob.glob(os.path.join(FLAGS.original_gt_folder,
                                       '*.' + FLAGS.segmentation_format))
  for annotation in annotations:
    raw_annotation = _remove_colormap(annotation)
    filename = os.path.basename(annotation)[:-4]
    _save_annotation(raw_annotation,
                     os.path.join(
                         FLAGS.output_dir,
                         filename + '.' + FLAGS.segmentation_format))


if __name__ == '__main__':
  tf.compat.v1.app.run()

至此獲取了對應的灰度掩模圖,看起來應該啥都沒有,是看起來,其實是有數據的。

第二步:訓練數據

將你的圖片和對應的掩模圖放在對應的位置中。上代碼:

import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")
print(ROOT_DIR)
# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
from PIL import Image
# %matplotlib inline

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "模型保存位置")
print(MODEL_DIR)
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "預訓練模型位置,mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)


class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 8

    # Number of classes (including background)
    # NUM_CLASSES = 1 + 3  # background + 3 shapes
    NUM_CLASSES = 1 + 1  # background + 1 shapes   #此處需要更改!!!!

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 128

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE = 32

    # Use a small epoch since the data is simple
    # STEPS_PER_EPOCH = 100
    STEPS_PER_EPOCH = 20

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5


config = ShapesConfig()
config.display()


def get_ax(rows=1, cols=1, size=8):
    """
    返回要在其中使用的Matplotlib Axes數組的所有可視化。 提供一個控制圖形大小的中心點。
     更改默認大小屬性以控制大小,渲染圖像
    """
    _, ax = plt.subplots(rows, cols, figsize=(size * cols, size * rows))
    return ax


class ShapesDataset(utils.Dataset):
    """
    count: 存放的圖片數據
    img_floder: 圖像數據的路徑
    mask_floder: 掩膜數據的路徑
    imglist: 圖像列表
    dataset_root_path: 數據的路徑,大路徑
    """
    def load_shapes_process(self, count, img_floder, mask_floder, imglist, dataset_root_path):
        #存放你的數據類型,此處需要更改
        self.add_class("shapes", 1, "你的類別")
        for i in range(count):
            # 獲取圖片寬和高
            filestr = imglist[i].split(".")[0]
            mask_path = mask_floder + "/" + filestr + ".png"
            cv_img = cv2.imread(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")
            #主要爲了獲取數據的長寬,方法有很多不限此處一種
            self.add_image("shapes", image_id=i, path=img_floder + "/" + imglist[i],
                           width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path)

    def load_mask(self, image_id):   #繼承重構!!!!需要自己寫一個!否則就是使用utils中的load_mask()
        """爲給定圖像ID的形狀生成實例掩膜 """
        info = self.image_info[image_id]
        count = 1  # number of object
        img = Image.open(info['mask_path'])  #輸入圖像的路徑,然後顯示該圖像

        num_obj = np.max(img)  #取掩膜圖中的最大值
        mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)  #掩膜圖,h*w*num_obj
        mask = self.draw_shape_process(num_obj, mask, img, image_id)
        occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)   #受用np的邏輯非函數,得到的時bool結果。然後將其轉化爲np.uiny8類型
        for i in range(count - 2, -1, -1):
            mask[:, :, i] = mask[:, :, i] * occlusion
            occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
        labels = ["類別名字1", "類別名字2"]
        labels_form = []
        for i in range(len(labels)):
            if labels[i].find("類別名字1") != -1:
                labels_form.append("類別名字1")
            elif labels[i].find("類別名字2") != -1:
                labels_form.append("類別名字2")

        class_ids = np.array([self.class_names.index(s) for s in labels_form])
        return mask, class_ids.astype(np.int32)

    def draw_shape_process(self, num_obj, mask, image, image_id):
        """ 根據給定的h*w*num_obj繪製形狀。"""
        info = self.image_info[image_id]
        for index in range(num_obj):
            for i in range(info['width']):
                for j in range(info['height']):
                    at_pixel = image.getpixel((i, j))
                    if at_pixel == index + 1:
                        mask[j, i, index] = 1
        return mask


#基礎的設置
dataset_root_path="data/"
img_floder = "存放原始圖像的位置"
mask_floder = "存放掩膜的位置"
#yaml_floder = dataset_root_path
imglist = os.listdir(img_floder)
count = len(imglist)
# print(1)

# Training dataset 訓練數據
dataset_train = ShapesDataset()
# dataset_train.load_shapes(count, img_floder, mask_floder, imglist, dataset_root_path)
dataset_train.load_shapes_process(count, img_floder, mask_floder, imglist, dataset_root_path)
dataset_train.prepare()
# print(2)
# Validation dataset 測試數據
dataset_val = ShapesDataset()
# dataset_val.load_shapes(7, img_floder, mask_floder, imglist, dataset_root_path)
dataset_val.load_shapes_process(7, img_floder, mask_floder, imglist, dataset_root_path)
dataset_val.prepare()


# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config,
                          model_dir=MODEL_DIR)

# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc",
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

#總的批次數據
# 訓練頭上的樹枝,傳遞layers =“ heads”將凍結除head以外的所有圖層
# 還可以傳遞正則表達式來選擇,按名稱模式訓練哪些層。
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=10,
            layers='heads')

# 微調所有圖層。通過layers =“all”訓練所有層。 你也可以傳遞正則表達式以選擇要圖層。按名稱訓練模式。
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE / 10,
            epochs=5,
            layers="all")

需要更改的地方在代碼中給出,需要根據自己的實際情況進行修改。此代碼也需要加入預訓練模型,和我上一篇博客一樣,可以參考。

預訓練位置在:https://github.com/matterport/Mask_RCNN/releases

結果如下所示:

其中utils.py中的load_mask()一定需要重構,目的就是加載我們自己的數據mask。否則就一致爆出問題:

然後測試自己的數據就是使用你自己的.hs文件來替代預訓練模型,來對自己的數據進行檢測。

第三步:測試

# -*- coding: utf-8 -*-
import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import cv2
import time
from mrcnn.config import Config
from datetime import datetime
# Root directory of the project
ROOT_DIR = os.getcwd()

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples/coco/"))  # To find local version
from samples.coco import coco


# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs_process")

# Local path to trained weights file
'''此處放置的是你的.h5文件'''
COCO_MODEL_PATH = os.path.join(MODEL_DIR, "shapes20200512T1706\mask_rcnn_shapes_0005.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)
    print("cuiwei***********************")

# Directory of images to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "data/pic")  #你的數據位置

class ShapesConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "shapes"

    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    # Number of classes (including background)   #注意更改類別
    NUM_CLASSES = 1 + 1  # background + 3 shapes

    # Use small images for faster training. Set the limits of the small side
    # the large side, and that determines the image shape.
    IMAGE_MIN_DIM = 320
    IMAGE_MAX_DIM = 384

    # Use smaller anchors because our image and objects are small
    RPN_ANCHOR_SCALES = (8 * 6, 16 * 6, 32 * 6, 64 * 6, 128 * 6)  # anchor side in pixels

    # Reduce training ROIs per image because the images are small and have
    # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
    TRAIN_ROIS_PER_IMAGE =100

    # Use a small epoch since the data is simple
    STEPS_PER_EPOCH = 100

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 50

class InferenceConfig(ShapesConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()

# model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)


# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)



# IMAGE_DIR = os.path.join(ROOT_DIR, "data/pic")
file_names_path = r"D:\lingyun\new_data_program\data\no_light_red"
file_names_img = os.listdir(file_names_path)
for file_names_img_i in file_names_img:
    # print(file_names_path+"\\"+file_names_img_i)
    img_path=file_names_path+"\\"+file_names_img_i
    image = skimage.io.imread(img_path)
    a=datetime.now()
    # Run detection
    results = model.detect([image], verbose=1)
    b=datetime.now()
    # Visualize results
    print("shijian", (b-a).seconds)
    r = results[0]
    visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                                class_names, r['scores'])

 

 

 

 

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章