所用ICNet版本:hellochick-Github,star 286
目前想在Tensorflow框架下使用ICNet訓練自己的數據集,發現語義分割方面好像Tensorflow框架下的“官方”代碼很少,都是大牛按照原作者的論文結合原作者在caffe框架下的代碼復現的……本篇博客使用的代碼也不例外,是臺灣省的國立清華大學的一名碩士生寫的……膜拜。
這篇博客記錄一下如何不使用預訓練的權重訓練自己的數據集(因爲權重文件都要從Google driver上下載……emmm,牆好高啊)。
P.S. 看到其他博客有說這個作者的代碼變化很大,我使用的是新版的代碼(沒有tool.py)
1.準備數據集
以我使用的數據集舉例,假設我們已經完成了數據集的標註工作(使用lableme標註),我的數據集文件夾爲:LMC_2433_DATASET,是一個關於變電站環境道路的自制數據集,總共包含2433張圖片以及對應的label。
將圖片隨機分爲三個部分,分別是train、val(可選)、test部分,三部分沒有交叉。個人建議train劃分的圖片多一些,我的比例是:1569,0,864。隨後將對應的標籤文件(8bit灰度圖)分別放在對應的annot文件夾下。然後準備對應的txt文件,txt文件的寫法是:
原圖地址 標籤地址
原圖地址 標籤地址
原圖地址 標籤地址
...
每一行對應一對原圖和label地址,地址使用絕對地址,中間用空格分割開。至此,數據集準備完畢。
2.修改源碼&訓練
因爲無法下載預訓練的權重文件,我只好挨個修改代碼,做到在沒有預訓練權重的情況下進行訓練。
針對train.py文件:
"""
This code is based on DrSleep's framework: https://github.com/DrSleep/tensorflow-deeplab-resnet
"""
import argparse
import os
import sys
import time
import tensorflow as tf
import numpy as np
from model import ICNet_BN
from utils.config import Config
from utils.visualize import decode_labels
from utils.image_reader import ImageReader, prepare_label
def get_arguments():
parser = argparse.ArgumentParser(description="Reproduced ICNet")
parser.add_argument("--random-mirror", action="store_true",
help="Whether to randomly mirror the inputs during the training.")
parser.add_argument("--random-scale", action="store_true",
help="Whether to randomly scale the inputs during the training.")
parser.add_argument("--update-mean-var", action="store_true",
help="whether to get update_op from tf.Graphic_Keys")
parser.add_argument("--train-beta-gamma", action="store_true",
help="whether to train beta & gamma in bn layer")
parser.add_argument("--dataset", required=True,
help="Which dataset to trained with",
choices=['cityscapes', 'ade20k', 'others'])
parser.add_argument("--filter-scale", type=int, default=2,
help="1 for using pruned model, while 2 for using non-pruned model.",
choices=[1, 2])
return parser.parse_args()
def get_mask(gt, num_classes, ignore_label):
less_equal_class = tf.less_equal(gt, num_classes-1)
not_equal_ignore = tf.not_equal(gt, ignore_label)
mask = tf.logical_and(less_equal_class, not_equal_ignore)
indices = tf.squeeze(tf.where(mask), 1)
return indices
def create_loss(output, label, num_classes, ignore_label):
raw_pred = tf.reshape(output, [-1, num_classes])
label = prepare_label(label, tf.stack(output.get_shape()[1:3]), num_classes=num_classes, one_hot=False)
label = tf.reshape(label, [-1,])
indices = get_mask(label, num_classes, ignore_label)
gt = tf.cast(tf.gather(label, indices), tf.int32)
pred = tf.gather(raw_pred, indices)
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=gt)
reduced_loss = tf.reduce_mean(loss)
return reduced_loss
def create_losses(net, label, cfg):
# Get output from different branches
sub4_out = net.layers['sub4_out']
sub24_out = net.layers['sub24_out']
sub124_out = net.layers['conv6_cls']
loss_sub4 = create_loss(sub4_out, label, cfg.param['num_classes'], cfg.param['ignore_label'])
loss_sub24 = create_loss(sub24_out, label, cfg.param['num_classes'], cfg.param['ignore_label'])
loss_sub124 = create_loss(sub124_out, label, cfg.param['num_classes'], cfg.param['ignore_label'])
l2_losses = [cfg.WEIGHT_DECAY * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
# Calculate weighted loss of three branches, you can tune LAMBDA values to get better results.
reduced_loss = cfg.LAMBDA1 * loss_sub4 + cfg.LAMBDA2 * loss_sub24 + cfg.LAMBDA3 * loss_sub124 + tf.add_n(l2_losses)
return loss_sub4, loss_sub24, loss_sub124, reduced_loss
class TrainConfig(Config):
def __init__(self, dataset, is_training, filter_scale=1, random_scale=None, random_mirror=None):
Config.__init__(self, dataset, is_training, filter_scale, random_scale, random_mirror)
# Set pre-trained weights here (You can download weight using `python script/download_weights.py`)
# Note that you need to use "bnnomerge" version.
# ~ model_weight = './model/cityscapes/icnet_cityscapes_train_30k_bnnomerge.npy'
model_weight = '' # 這裏將地址設爲空
# Set hyperparameters here, you can get much more setting in Config Class, see 'utils/config.py' for details.
LAMBDA1 = 0.16
LAMBDA2 = 0.4
LAMBDA3 = 1.0
BATCH_SIZE = 4
LEARNING_RATE = 1e-2 # 這些參數按需修改
def main():
"""Create the model and start the training."""
args = get_arguments()
"""
Get configurations here. We pass some arguments from command line to init configurations, for training hyperparameters,
you can set them in TrainConfig Class.
Note: we set filter scale to 1 for pruned model, 2 for non-pruned model. The filters numbers of non-pruned
model is two times larger than prunde model, e.g., [h, w, 64] <-> [h, w, 32].
"""
cfg = TrainConfig(dataset=args.dataset,
is_training=True,
random_scale=False, # 這裏直接強制傳參False
# ~ random_scale=args.random_scale, # 因爲數據集比較豐富,這裏沒有使用random_scale處理圖像
# ~ random_mirror=args.random_mirror, # 同上,沒有使用random_mirror處理圖像
random_mirror=False, # 這裏直接強制傳參False
filter_scale=args.filter_scale)
cfg.display()
# Setup training network and training samples
train_reader = ImageReader(cfg=cfg, mode='train')
train_net = ICNet_BN(image_reader=train_reader,
cfg=cfg, mode='train')
loss_sub4, loss_sub24, loss_sub124, reduced_loss = create_losses(train_net, train_net.labels, cfg)
# Setup validation network and validation samples
with tf.variable_scope('', reuse=True):
val_reader = ImageReader(cfg, mode='eval')
val_net = ICNet_BN(image_reader=val_reader,
cfg=cfg, mode='train')
val_loss_sub4, val_loss_sub24, val_loss_sub124, val_reduced_loss = create_losses(val_net, val_net.labels, cfg)
# Using Poly learning rate policy
base_lr = tf.constant(cfg.LEARNING_RATE)
step_ph = tf.placeholder(dtype=tf.float32, shape=())
learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / cfg.TRAINING_STEPS), cfg.POWER))
# Set restore variable
restore_var = tf.global_variables()
all_trainable = [v for v in tf.trainable_variables() if ('beta' not in v.name and 'gamma' not in v.name) or args.train_beta_gamma]
# Gets moving_mean and moving_variance update operations from tf.GraphKeys.UPDATE_OPS
if args.update_mean_var == False:
update_ops = None
else:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
opt_conv = tf.train.MomentumOptimizer(learning_rate, cfg.MOMENTUM)
grads = tf.gradients(reduced_loss, all_trainable)
train_op = opt_conv.apply_gradients(zip(grads, all_trainable))
# Create session & restore weights (Here we only need to use train_net to create session since we reuse it)
train_net.create_session()
# ~ train_net.restore(cfg.model_weight, restore_var) # 因爲沒有預訓練權重,這句註釋掉!
saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)
# Iterate over training steps.
for step in range(cfg.TRAINING_STEPS):
start_time = time.time()
feed_dict = {step_ph: step}
if step % cfg.SAVE_PRED_EVERY == 0:
loss_value, loss1, loss2, loss3, val_loss_value, _ = train_net.sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, val_reduced_loss, train_op], feed_dict=feed_dict)
train_net.save(saver, cfg.SNAPSHOT_DIR, step)
else:
loss_value, loss1, loss2, loss3, val_loss_value, _ = train_net.sess.run([reduced_loss, loss_sub4, loss_sub24, loss_sub124, val_reduced_loss, train_op], feed_dict=feed_dict)
duration = time.time() - start_time
print('step {:d} \t total loss = {:.3f}, sub4 = {:.3f}, sub24 = {:.3f}, sub124 = {:.3f}, val_loss: {:.3f} ({:.3f} sec/step)'.\
format(step, loss_value, loss1, loss2, loss3, val_loss_value, duration))
if __name__ == '__main__':
main()
針對config.py文件:
import numpy as np
import os
class Config(object):
# Setting dataset directory
# ~ CITYSCAPES_DATA_DIR = './data/cityscapes_dataset/cityscape/'
CITYSCAPES_DATA_DIR = '' # 設爲空
# ~ ADE20K_DATA_DIR = './data/ADEChallengeData2016/'
ADE20K_DATA_DIR = '' # 設爲空
# ~ ADE20K_eval_list = os.path.join('./data/list/ade20k_val_list.txt')
# ~ CITYSCAPES_eval_list = os.path.join('./data/list/cityscapes_val_list.txt')
ADE20K_eval_list = os.path.join('') # 設爲空
CITYSCAPES_eval_list = os.path.join('') # 設爲空
# ~ ADE20K_train_list = os.path.join('./data/list/ade20k_train_list.txt')
# ~ CITYSCAPES_train_list = os.path.join('./data/list/cityscapes_train_list.txt')
ADE20K_train_list = os.path.join('') # 設爲空
CITYSCAPES_train_list = os.path.join('') # 設爲空
IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32)
ADE20k_param = {'name': 'ade20k',
'num_classes': 150, # predict: [0~149] corresponding to label [1~150], ignore class 0 (background)
'ignore_label': 0,
'eval_size': [480, 480],
'eval_steps': 2000,
'eval_list': ADE20K_eval_list,
'train_list': ADE20K_train_list,
'data_dir': ADE20K_DATA_DIR}
cityscapes_param = {'name': 'cityscapes',
'num_classes': 19,
'ignore_label': 255,
'eval_size': [1025, 2049],
'eval_steps': 500,
'eval_list': CITYSCAPES_eval_list,
'train_list': CITYSCAPES_train_list,
'data_dir': CITYSCAPES_DATA_DIR}
# ~ model_paths = {'train': './model/cityscapes/icnet_cityscapes_train_30k.npy',
# ~ 'trainval': './model/cityscapes/icnet_cityscapes_trainval_90k.npy',
# ~ 'train_bn': './model/cityscapes/icnet_cityscapes_train_30k_bnnomerge.npy',
# ~ 'trainval_bn': './model/cityscapes/icnet_cityscapes_trainval_90k_bnnomerge.npy',
# ~ 'others': './model/ade20k/model.ckpt-27150'}
model_paths = {'train': '', # 設爲空
'trainval': '', # 設爲空
'train_bn': '', # 設爲空
'trainval_bn': '', # 設爲空
'others': ''} # 設爲空
## If you want to train on your own dataset, try to set these parameters.
others_param = {'name': 'LMC_2433_DATASET', # 這裏輸入你的數據集名稱
'num_classes': 6, # 你的數據集類別數
'ignore_label': 0, # 你的數據集中background的類別(如果理解有誤請指正)
'eval_size': [256, 256], # 你的訓練集圖片大小
'eval_steps': 206, # 這裏是驗證時的步數,訓練時沒有什麼用
'eval_list': '/home/kanghao/semantic_segmentation/icnet/ICNet-tensorflow/LMC_2433_DATASET/test.txt', # 驗證集的txt文件地址,我設置的是測試集的……
'train_list': '/home/kanghao/semantic_segmentation/icnet/ICNet-tensorflow/LMC_2433_DATASET/train.txt', # 訓練集的txt文件地址
'data_dir': '/home/kanghao/semantic_segmentation/icnet/ICNet-tensorflow/LMC_2433_DATASET'} # 數據集地址
## You can modify following lines to train different training configurations.
INFER_SIZE = [256, 256, 3] # 對應數據集的圖片尺寸
TRAINING_SIZE = [256, 256] # 同上
TRAINING_STEPS = 20000 # 訓練的步數
N_WORKERS = 8
BATCH_SIZE = 4 # batch size
LEARNING_RATE = 1e-4 # 學習率
MOMENTUM = 0.9
POWER = 0.9
RANDOM_SEED = 1234
WEIGHT_DECAY = 0.0002 # 權重衰減
SNAPSHOT_DIR = './snapshots/' # 模型保存地址
SAVE_NUM_IMAGES = 4
SAVE_PRED_EVERY = 1000
# Loss Function = LAMBDA1 * sub4_loss + LAMBDA2 * sub24_loss + LAMBDA3 * sub124_loss
LAMBDA1 = 0.16
LAMBDA2 = 0.4
LAMBDA3 = 1.0
def __init__(self, dataset, is_training=False, filter_scale=1, random_scale=False, random_mirror=False):
print('Setup configurations...')
if dataset == 'ade20k':
self.param = self.ADE20k_param
elif dataset == 'cityscapes':
self.param = self.cityscapes_param
elif dataset == 'others':
self.param = self.others_param
self.dataset = dataset
self.random_scale = random_scale
self.random_mirror = random_mirror
self.is_training = is_training
self.filter_scale = filter_scale
def display(self):
"""Display Configuration values."""
print("\nConfigurations:")
for a in dir(self):
if not a.startswith("__") and not callable(getattr(self, a)) and not isinstance(getattr(self, a), dict):
print("{:30} {}".format(a, getattr(self, a)))
if a == ("param"):
print(a)
for k, v in getattr(self, a).items():
print(" {:27} {}".format(k, v))
print("\n")
至此,便可以開始訓練了,訓練的命令是:
python train.py --dataset=others
訓練的模型文件保存後如下圖所示:
說一下遇到的坑:
在image_reader.py文件的177行,有這樣一句:
dataset = dataset.batch(cfg.BATCH_SIZE, drop_remainder=True)
如果你Tensorflow版本低於1.10.0,那麼是沒有drop_remainder參數的,我的選擇是直接去掉了這個參數。這個參數的作用是如果最後的圖像不足一個batch size,那麼就丟棄不用,所以如果你batch size設置的剛好可以用訓練集圖片總數除以你的batch size初的盡的話,沒有問題。
3.驗證訓練結果
訓練結束,除了訓練時直觀的看到total loss的下降(在我的數據集上,我訓練10,000步,最後total loss下降到0.32左右),還可以通過mIOU、per class accuracy、streaming accuracy來評估訓練效果。但是原作者的evaluate.py文件僅針對ade20k及cityscape數據集,所以還需要自己修改代碼,我修改後的代碼如下:
import argparse
import time
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tqdm import trange
from utils.config import Config
from utils.image_reader import ImageReader
from model import ICNet, ICNet_BN
# mapping different model
model_config = {'train': ICNet, 'trainval': ICNet, 'train_bn': ICNet_BN, 'trainval_bn': ICNet_BN, 'others': ICNet_BN}
acc_list = []
mIOU_list = []
pca_list = []
def get_arguments():
parser = argparse.ArgumentParser(description="Reproduced ICNet")
parser.add_argument("--model", type=str, default='',
help="Model to use.",
choices=['train', 'trainval', 'train_bn', 'trainval_bn', 'others'],
required=True)
parser.add_argument("--dataset", type=str, default='',
choices=['ade20k', 'cityscapes', 'others'],
required=True)
parser.add_argument("--filter-scale", type=int, default=2,
help="1 for using pruned model, while 2 for using non-pruned model.",
choices=[1, 2])
return parser.parse_args()
def main():
args = get_arguments()
cfg = Config(dataset=args.dataset, is_training=False, filter_scale=args.filter_scale)
model = model_config[args.model]
reader = ImageReader(cfg=cfg, mode='eval')
net = model(image_reader=reader, cfg=cfg, mode='eval')
# mIoU
pred_flatten = tf.reshape(net.output, [-1,])
label_flatten = tf.reshape(net.labels, [-1,])
mask = tf.not_equal(label_flatten, cfg.param['ignore_label'])
indices = tf.squeeze(tf.where(mask), 1)
gt = tf.cast(tf.gather(label_flatten, indices), tf.int32)
pred = tf.gather(pred_flatten, indices)
if cfg.dataset == 'ade20k':
pred = tf.add(pred, tf.constant(1, dtype=tf.int64))
mIoU, update_op = tf.metrics.mean_iou(predictions=pred, labels=gt, num_classes=cfg.param['num_classes']+1)
if cfg.dataset == 'others':
accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(pred, gt)
mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(predictions=pred, labels=gt, num_classes=cfg.param['num_classes'])
per_class_accuracy, per_class_accuracy_update = tf.metrics.mean_per_class_accuracy(labels=gt, predictions=pred, num_classes=cfg.param['num_classes'])
metrics_op = tf.group(accuracy_update, update_op, per_class_accuracy_update)
elif cfg.dataset == 'cityscapes':
mIoU, update_op = tf.metrics.mean_iou(predictions=pred, labels=gt, num_classes=cfg.param['num_classes'])
net.create_session()
# ~ net.restore(cfg.model_paths[args.model])
for i in trange(cfg.param['eval_steps'], desc='evaluation', leave=True):
# ~ _ = net.sess.run(update_op)
# ~ _,accuracy, mIoU, per_class_accuracy = net.sess.run([metrics_op,accuracy, mIoU, per_class_accuracy])
_, accuracy_value, mIoU_value, per_class_accuracy_value = net.sess.run([metrics_op, accuracy, mIoU, per_class_accuracy])
acc_list.append(accuracy_value)
mIOU_list.append(mIoU_value)
pca_list.append(per_class_accuracy_value)
total_acc = sum(acc_list)
total_mIoU = sum(mIOU_list)
total_pca = sum(pca_list)
print(total_mIoU)
print('Streaming Accuracy: {}'.format(total_acc / len(acc_list)))
print('mIoU: {}'.format(total_mIoU / len(mIOU_list)))
print('per class accuracy: {}'.format(total_pca / len(pca_list)))
if __name__ == '__main__':
main()
運行後,該腳本輸出結果爲:
是的 沒錯 我知道 你別說了 你閉嘴 停 結果很差…… 目前對代碼理解還很生疏,等我研讀一下代碼看看能不能解決……現在直觀的方法就是把gt和pred圖保存一下,看看分割效果??
最後記錄一下evaluate.py文件自己改寫時遇到的坑,注意這一句:
_,accuracy, mIoU, per_class_accuracy = net.sess.run([metrics_op,accuracy, mIoU, per_class_accuracy])
在我的源碼中我註釋掉的那句。 這個sess.run返回的是float32類型的數據,而輸入的是一個tensor,所以不要把輸出的變量名比如mIoU設置成和sess.run()中的相同……
最後的最後,歡迎各位小夥伴批評、指正和討論。