1、引言
這節我們將介紹圖像分類問題,任務是給定一個輸入圖片,將其指派到一個已知的混合類別中的某一個標籤。圖像分類是計算機視覺領域的核心問題之一,儘管它很(看上去)很簡單,但是卻有廣泛的實踐應用。而且,在之後的我將會使用tensorlfow 實現圖像分割,許多其他的看上去不同的計算機視覺任務(例如物體識別,分割),都能夠還原成圖像分類。
例如,在下圖中,一個圖像分類模型將一個圖片分配給四個類別(cat,dog,hat,mug)標籤的概率。對於計算機而言,圖片被表示成一個大的數字矩陣。
算了,廢話我就不扯了,小博的博客向來都是實踐居多,直接來代碼,上步驟,教你訓練自己的數據。
2、數據集準備
如圖所示,我在網上找的5類數據集,分別爲動物,花草,吉他,房子,飛機。
圖片每一類800張,我們首先將圖片5張作爲訓練集,300張作爲測試集。
分別存在train ,test文件夾內。
接下來我們將文件圖片路徑寫入txt
#load data
# -*-coding: utf-8 -*-
"""
@Project: create_tfrecord
@File : create_tfrecord.py
@Author : xiao_run
@E-mail : [email protected]
@Date : 2018-07-27 17:19:54
@desc : 將圖片數據保存爲單個tfrecord文件
"""
##########################################################################
import os #python中的os模塊用於處理文件和目錄
import skimage #python中的skimage模塊用於圖像處理
import numpy as np #python中的numpy模塊用於科學計算
from skimage import data,transform
from skimage.color import rgb2gray #rgb2gray將圖片轉化爲灰度
#import tensorflow as tf
import cv2
import matplotlib.pyplot as plt
import random
from PIL import Image
txt_read=open("./train_file_txt.txt","w+")
def load_data(data_directory):
directories=[d for d in os.listdir(data_directory) if os.path.isdir(os.path.join(data_directory,d))]
#d is every classification file
labels=[]
images=[]
for d in directories:
#每一類的路徑
label_directory=os.path.join(data_directory,d)
print(label_directory)
file_names=[os.path.join(label_directory,f) for f in os.listdir(label_directory) if f.endswith(".ppm")]
#file_names is every photo which is end with ".ppm"
print(file_names)
for f in file_names:
images.append(skimage.data.imread(f)) #read image
labels.append(int(d)) #read label
#print(labels)
write_file = f + " " + d + "\n"
txt_read.write(write_file)
return images,labels
#images and labels are list
ROOT_PATH="/home/lenovo/tensorflow/classfication"
train_data_directory=os.path.join(ROOT_PATH,"Training")
test_data_directory=os.path.join(ROOT_PATH,"Testing")
images,labels=load_data(train_data_directory)
接下來我們可以看到訓練集與測試集的圖片名在txt中生成
3、生成tfrecord文件,batch方便讀取。
# -*-coding: utf-8 -*-
"""
@Project: create_tfrecord
@File : create_tfrecord.py
@Author : panjq
@E-mail : [email protected]
@Date : 2018-07-27 17:19:54
@desc : 將圖片數據保存爲單個tfrecord文件
"""
##########################################################################
import tensorflow as tf
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
import random
from PIL import Image
##########################################################################
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
# 生成字符串型的屬性
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
# 生成實數型的屬性
def float_list_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
def get_example_nums(tf_records_filenames):
'''
統計tf_records圖像的個數(example)個數
:param tf_records_filenames: tf_records文件路徑
:return:
'''
nums= 0
for record in tf.python_io.tf_record_iterator(tf_records_filenames):
nums += 1
return nums
def show_image(title,image):
'''
顯示圖片
:param title: 圖像標題
:param image: 圖像的數據
:return:
'''
# plt.figure("show_image")
# print(image.dtype)
plt.imshow(image)
plt.axis('on') # 關掉座標軸爲 off
plt.title(title) # 圖像題目
plt.show()
def load_labels_file(filename,labels_num=1,shuffle=False):
'''
載圖txt文件,文件中每行爲一個圖片信息,且以空格隔開:圖像路徑 標籤1 標籤2,如:test_image/1.jpg 0 2
:param filename:
:param labels_num :labels個數
:param shuffle :是否打亂順序
:return:images type->list
:return:labels type->list
'''
images=[]
labels=[]
with open(filename) as f:
lines_list=f.readlines()
if shuffle:
random.shuffle(lines_list)
for lines in lines_list:
line=lines.rstrip().split(' ')
label=[]
for i in range(labels_num):
label.append(int(line[i+1]))
images.append(line[0])
labels.append(label)
return images,labels
def read_image(filename, resize_height, resize_width,normalization=False):
'''
讀取圖片數據,默認返回的是uint8,[0,255]
:param filename:
:param resize_height:
:param resize_width:
:param normalization:是否歸一化到[0.,1.0]
:return: 返回的圖片數據
'''
bgr_image = cv2.imread(filename)
if len(bgr_image.shape)==2:#若是灰度圖則轉爲三通道
print("Warning:gray image",filename)
bgr_image = cv2.cvtColor(bgr_image, cv2.COLOR_GRAY2BGR)
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)#將BGR轉爲RGB
# show_image(filename,rgb_image)
# rgb_image=Image.open(filename)
if resize_height>0 and resize_width>0:
rgb_image=cv2.resize(rgb_image,(resize_width,resize_height))
rgb_image=np.asanyarray(rgb_image)
if normalization:
# 不能寫成:rgb_image=rgb_image/255
rgb_image=rgb_image/255.0
# show_image("src resize image",image)
return rgb_image
def get_batch_images(images,labels,batch_size,labels_nums,one_hot=False,shuffle=False,num_threads=1):
'''
:param images:圖像
:param labels:標籤
:param batch_size:
:param labels_nums:標籤個數
:param one_hot:是否將labels轉爲one_hot的形式
:param shuffle:是否打亂順序,一般train時shuffle=True,驗證時shuffle=False
:return:返回batch的images和labels
'''
min_after_dequeue = 200
capacity = min_after_dequeue + 3 * batch_size # 保證capacity必須大於min_after_dequeue參數值
if shuffle:
images_batch, labels_batch = tf.train.shuffle_batch([images,labels],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=min_after_dequeue,
num_threads=num_threads)
else:
images_batch, labels_batch = tf.train.batch([images,labels],
batch_size=batch_size,
capacity=capacity,
num_threads=num_threads)
if one_hot:
labels_batch = tf.one_hot(labels_batch, labels_nums, 1, 0)
return images_batch,labels_batch
def read_records(filename,resize_height, resize_width,type=None):
'''
解析record文件:源文件的圖像數據是RGB,uint8,[0,255],一般作爲訓練數據時,需要歸一化到[0,1]
:param filename:
:param resize_height:
:param resize_width:
:param type:選擇圖像數據的返回類型
None:默認將uint8-[0,255]轉爲float32-[0,255]
normalization:歸一化float32-[0,1]
centralization:歸一化float32-[0,1],再減均值中心化
:return:
'''
# 創建文件隊列,不限讀取的數量
filename_queue = tf.train.string_input_producer([filename])
# create a reader from file queue
reader = tf.TFRecordReader()
# reader從文件隊列中讀入一個序列化的樣本
_, serialized_example = reader.read(filename_queue)
# get feature from serialized example
# 解析符號化的樣本
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'depth': tf.FixedLenFeature([], tf.int64),
'label': tf.FixedLenFeature([], tf.int64)
}
)
tf_image = tf.decode_raw(features['image_raw'], tf.uint8)#獲得圖像原始的數據
tf_height = features['height']
tf_width = features['width']
tf_depth = features['depth']
tf_label = tf.cast(features['label'], tf.int32)
# PS:恢復原始圖像數據,reshape的大小必須與保存之前的圖像shape一致,否則出錯
# tf_image=tf.reshape(tf_image, [-1]) # 轉換爲行向量
tf_image=tf.reshape(tf_image, [resize_height, resize_width, 3]) # 設置圖像的維度
# 恢復數據後,纔可以對圖像進行resize_images:輸入uint->輸出float32
# tf_image=tf.image.resize_images(tf_image,[224, 224])
# 存儲的圖像類型爲uint8,tensorflow訓練時數據必須是tf.float32
if type is None:
tf_image = tf.cast(tf_image, tf.float32)
elif type=='normalization':# [1]若需要歸一化請使用:
# 僅當輸入數據是uint8,纔會歸一化[0,255]
# tf_image = tf.image.convert_image_dtype(tf_image, tf.float32)
tf_image = tf.cast(tf_image, tf.float32) * (1. / 255.0) # 歸一化
elif type=='centralization':
# 若需要歸一化,且中心化,假設均值爲0.5,請使用:
tf_image = tf.cast(tf_image, tf.float32) * (1. / 255) - 0.5 #中心化
# 這裏僅僅返回圖像和標籤
# return tf_image, tf_height,tf_width,tf_depth,tf_label
return tf_image,tf_label
def create_records(image_dir,file, output_record_dir, resize_height, resize_width,shuffle,log=5):
'''
實現將圖像原始數據,label,長,寬等信息保存爲record文件
注意:讀取的圖像數據默認是uint8,再轉爲tf的字符串型BytesList保存,解析請需要根據需要轉換類型
:param image_dir:原始圖像的目錄
:param file:輸入保存圖片信息的txt文件(image_dir+file構成圖片的路徑)
:param output_record_dir:保存record文件的路徑
:param resize_height:
:param resize_width:
PS:當resize_height或者resize_width=0是,不執行resize
:param shuffle:是否打亂順序
:param log:log信息打印間隔
'''
# 加載文件,僅獲取一個label
images_list, labels_list=load_labels_file(file,1,shuffle)
writer = tf.python_io.TFRecordWriter(output_record_dir)
for i, [image_name, labels] in enumerate(zip(images_list, labels_list)):
image_path=os.path.join(image_dir,images_list[i])
if not os.path.exists(image_path):
print('Err:no image',image_path)
continue
image = read_image(image_path, resize_height, resize_width)
image_raw = image.tostring()
if i%log==0 or i==len(images_list)-1:
print('------------processing:%d-th------------' % (i))
print('current image_path=%s' % (image_path),'shape:{}'.format(image.shape),'labels:{}'.format(labels))
# 這裏僅保存一個label,多label適當增加"'label': _int64_feature(label)"項
label=labels[0]
example = tf.train.Example(features=tf.train.Features(feature={
'image_raw': _bytes_feature(image_raw),
'height': _int64_feature(image.shape[0]),
'width': _int64_feature(image.shape[1]),
'depth': _int64_feature(image.shape[2]),
'label': _int64_feature(label)
}))
writer.write(example.SerializeToString())
writer.close()
def disp_records(record_file,resize_height, resize_width,show_nums=4):
'''
解析record文件,並顯示show_nums張圖片,主要用於驗證生成record文件是否成功
:param tfrecord_file: record文件路徑
:return:
'''
# 讀取record函數
tf_image, tf_label = read_records(record_file,resize_height,resize_width,type='normalization')
# 顯示前4個圖片
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(show_nums):
image,label = sess.run([tf_image,tf_label]) # 在會話中取出image和label
# image = tf_image.eval()
# 直接從record解析的image是一個向量,需要reshape顯示
# image = image.reshape([height,width,depth])
print('shape:{},tpye:{},labels:{}'.format(image.shape,image.dtype,label))
# pilimg = Image.fromarray(np.asarray(image_eval_reshape))
# pilimg.show()
show_image("image:%d"%(label),image)
coord.request_stop()
coord.join(threads)
def batch_test(record_file,resize_height, resize_width):
'''
:param record_file: record文件路徑
:param resize_height:
:param resize_width:
:return:
:PS:image_batch, label_batch一般作爲網絡的輸入
'''
# 讀取record函數
tf_image,tf_label = read_records(record_file,resize_height,resize_width,type='normalization')
image_batch, label_batch= get_batch_images(tf_image,tf_label,batch_size=4,labels_nums=5,one_hot=False,shuffle=False)
init = tf.global_variables_initializer()
with tf.Session() as sess: # 開始一個會話
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
for i in range(4):
# 在會話中取出images和labels
images, labels = sess.run([image_batch, label_batch])
# 這裏僅顯示每個batch裏第一張圖片
show_image("image", images[0, :, :, :])
print('shape:{},tpye:{},labels:{}'.format(images.shape,images.dtype,labels))
# 停止所有線程
coord.request_stop()
coord.join(threads)
if __name__ == '__main__':
# 參數設置
resize_height = 224 # 指定存儲圖片高度
resize_width = 224 # 指定存儲圖片寬度
shuffle=True
log=5
# 產生train.record文件
image_dir='dataset/train'
train_labels = 'dataset/train.txt' # 圖片路徑
train_record_output = 'dataset/record/train{}.tfrecords'.format(resize_height)
create_records(image_dir,train_labels, train_record_output, resize_height, resize_width,shuffle,log)
train_nums=get_example_nums(train_record_output)
print("save train example nums={}".format(train_nums))
# 產生val.record文件
image_dir='dataset/val'
val_labels = 'dataset/val.txt' # 圖片路徑
val_record_output = 'dataset/record/val{}.tfrecords'.format(resize_height)
create_records(image_dir,val_labels, val_record_output, resize_height, resize_width,shuffle,log)
val_nums=get_example_nums(val_record_output)
print("save val example nums={}".format(val_nums))
# 測試顯示函數
# disp_records(train_record_output,resize_height, resize_width)
batch_test(train_record_output,resize_height, resize_width)
運行上述代碼即可生成訓練集與測試集的tfrecord格式數據集,這裏有個缺陷就是生成之後纔打亂數據集,後面我將在生成txt的時候就將其打亂。我們可以看到record文件下已生成文件
4、搭建分類卷積神經網絡,在這裏我分別搭建了alexnet,vgg,resnet,後續將會加上批規範化等操作,提高準確率,避免過擬合。新建一個py文件,model_cnn.py
import tensorflow as tf
import numpy as np
from tensorflow.python.training import moving_averages
EPS = 10e-5
class CNN(object):
def __init__(self, num_classes, keep_prob):
super(CNN, self).__init__()
# self.NUM_SAMPLES = num_samples
# self.WIDTH = width
# self.HEIGHT = height
self.NUM_CLASSES = num_classes
self.KEEP_PROB = keep_prob
def BatchNomalization(self,x,name,isTrain=True,isBn=True):
if not isBn:
return x
with tf.variable_scope(name) as scope:
axis = list(range(len(x.get_shape()) - 1))
mean, variance = tf.nn.moments(x, axis)
beta = tf.get_variable('beta', initializer=tf.zeros_initializer, shape=x.get_shape()[-1],
dtype=tf.float32)
gamma = tf.get_variable('gamma', initializer=tf.ones_initializer, shape=x.get_shape()[-1],
dtype=tf.float32)
moving_mean = tf.get_variable(name='moving_mean', initializer=tf.zeros_initializer,
shape=x.get_shape()[-1], dtype=tf.float32, trainable=False)
moving_variance = tf.get_variable(name='moving_var', initializer=tf.zeros_initializer,
shape=x.get_shape()[-1], dtype=tf.float32, trainable=False)
update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, 0.999)
update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, 0.999)
if isTrain:
tf.add_to_collection('mean', update_moving_mean)
tf.add_to_collection('variance', update_moving_variance)
else:
mean = update_moving_mean
variance = update_moving_variance
x = tf.nn.batch_normalization(x, mean, variance, beta, gamma, 0.001)
return x
def conv_layer_relu(self, x, weights, biases, stride, name, relu='TRUE', padding='SAME'):
with tf.variable_scope(name) as scope:
weights = tf.get_variable("weights", weights, initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", biases, initializer=tf.truncated_normal_initializer())
conv = tf.nn.conv2d(x, weights, strides=stride, padding=padding, name=scope.name)
conv = tf.nn.bias_add(conv,biases)
if relu == 'TRUE':
conv = tf.nn.relu(conv, name=scope.name + "_relu")
return conv
def conv_with_no_relu(self,x, weights, biases, stride, name, padding='SAME'):
with tf.variable_scope(name) as scope:
weights = tf.get_variable("weights", weights, initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", biases, initializer=tf.truncated_normal_initializer())
conv = tf.nn.conv2d(x, weights, strides=stride, padding=padding, name=scope.name)
conv=tf.nn.bias_add(conv,biases)
return conv
def maxpool(self, x, filter_size, stride, name):
return tf.nn.max_pool(x, ksize=filter_size, strides=stride, padding='VALID', name=name)
def relu(self,x):
return tf.nn.relu(x)
def fc_relu(self, x, weights, biases, name, relu='TRUE'):
with tf.variable_scope(name) as scope:
weights = tf.get_variable("weights", weights, initializer=tf.truncated_normal_initializer())
biases = tf.get_variable("biases", biases, initializer=tf.truncated_normal_initializer())
fc = tf.add(tf.matmul(x, weights), biases, name=scope.name)
#fc=tf.layers.batch_normalization(fc,trainable=True)
if relu == 'TRUE':
fc = tf.nn.relu(fc, name=scope.name + "_relu")
#fc = tf.layers.batch_normalization(fc, trainable=True)
return fc
def dropout(self, x, name):
return tf.nn.dropout(x, self.KEEP_PROB, name=name)
def alex_net(self, x):
# reshaping into 4d tensor
x = tf.reshape(x, [-1, 224, 224, 3])
# conv1 layer with relu
conv1 = self.conv_layer_relu(x, [11, 11, 3, 96], [96], [1, 4, 4, 1], "alex_conv1")
#BN1=self.BatchNomalization(x,name="bn1",isTrain=True,isBn=True)
# maxpool_1
pool1 = self.maxpool(conv1, [1, 3, 3, 1], [1, 2, 2, 1], "alex_pool1")
# normalization layer after conv1
norm1 = tf.nn.local_response_normalization(pool1, name="alex_norm1")
# conv2 layer with relu
conv2 = self.conv_layer_relu(norm1, [5, 5, 96, 256], [256], [1, 1, 1, 1], "alex_conv2")
# maxpool_2
pool2 = self.maxpool(conv2, [1, 3, 3, 1], [1, 2, 2, 1], "alex_pool2")
# normalization after conv2
norm2 = tf.nn.local_response_normalization(pool2, name="alex_norm2")
# conv3 layer with relu
conv3 = self.conv_layer_relu(norm2, [3, 3, 256, 384], [384], [1, 1, 1, 1], "alex_conv3")
# conv4 layer with relu
conv4 = self.conv_layer_relu(conv3, [3, 3, 384, 384], [384], [1, 1, 1, 1], "alex_conv4")
# conv5 layer with relu
conv5 = self.conv_layer_relu(conv4, [3, 3, 384, 256], [256], [1, 1, 1, 1], "alex_conv5")
# maxpool_2 after conv5
pool3 = self.maxpool(conv5, [1, 3, 3, 1], [1, 2, 2, 1], "alex_pool3")
# stretching data into array for fc layers
x2 = tf.reshape(pool3, [-1, 6 * 6 * 256])
# fc6 with relu
fc6 = self.fc_relu(x2, [6 * 6 * 256, 4096], [4096], "alex_fc6")
# dropout for fc6
dropout_fc6 = self.dropout(fc6, "alex_drop_fc6")
# fc7 with relu
fc7 = self.fc_relu(dropout_fc6, [4096, 4096], [4096], "alex_fc7")
# dropout for fc7
dropout_fc7 = self.dropout(fc7, "alex_drop_fc7")
# fc8 or output WITHOUT acivation and dropout
out = self.fc_relu(dropout_fc7, [4096, self.NUM_CLASSES], [self.NUM_CLASSES], "alex_out", relu='FALSE')
return out
def vgg_net(self, x):
# reshaping into 4d tensor
x = tf.reshape(x, [-1, 224, 224, 3])
# conv1_1 layer with relu
#conv1_1 = self.conv_layer_relu(x, [3, 3, 3, 64], [64], [1, 1, 1, 1], "vgg_conv1_1")
conv1_1 = self.conv_with_no_relu(x, [3, 3, 3, 64], [64], [1, 1, 1, 1], "vgg_conv1_1")
BN1=self.BatchNomalization(conv1_1,name="BN1",isTrain=True,isBn=True)
conv1_1=self.relu(BN1)
# conv1_2 layer with relu
conv1_2=self.conv_with_no_relu(conv1_1, [3, 3, 64, 64], [64], [1, 2, 2, 1], "vgg_conv1_2")
#conv1_2 = self.conv_layer_relu(conv1_1, [3, 3, 64, 64], [64], [1, 1, 1, 1], "vgg_conv1_2")
# maxpool 1
BN2=self.BatchNomalization(conv1_2,name="BN2",isTrain=True,isBn=True)
conv1_2=self.relu(BN2)
pool1 = self.maxpool(conv1_2, [1, 2, 2, 1], [1, 2, 2, 1], "vgg_pool1")
# norm layer after pool1
#norm1 = tf.nn.local_response_normalization(pool1, name="vgg_norm1")
# conv2_1 layer with relu
conv2_1 = self.conv_with_no_relu(pool1, [3, 3, 64, 128], [128], [1, 1, 1, 1], "vgg_conv2_1")
# conv2_2 layer with relu
BN2_1=self.BatchNomalization(conv2_1,name="BN2_1",isTrain=True,isBn=True)
conv2_1=self.relu(BN2_1)
#conv2_2 = self.conv_layer_relu(conv2_1, [3, 3, 128, 128], [128], [1, 1, 1, 1], "vgg_conv2_2")
conv2_2 = self.conv_with_no_relu(conv2_1, [3, 3, 128, 128], [128], [1, 1, 1, 1], "vgg_conv2_2")
# conv2_2 layer with relu
BN2_2 = self.BatchNomalization(conv2_2, name="BN2_2", isTrain=True, isBn=True)
conv2_2 = self.relu(BN2_2)
# maxpool 2
pool2 = self.maxpool(conv2_2, [1, 2, 2, 1], [1, 2, 2, 1], "vgg_pool2")
# norm layer after pool2
#norm2 = tf.nn.local_response_normalization(pool2, name="vgg_norm2")
# conv3_1 layer with relu
conv3_1 = self.conv_layer_relu(conv2_2, [3, 3, 128, 256], [256], [1, 1, 1, 1], "vgg_conv3_1")
# conv3_2 layer with relu
conv3_2 = self.conv_layer_relu(conv3_1, [3, 3, 256, 256], [256], [1, 1, 1, 1], "vgg_conv3_2")
# conv3_3 layer with relu
conv3_3 = self.conv_layer_relu(conv3_2, [3, 3, 256, 256], [256], [1, 1, 1, 1], "vgg_conv3_3")
# maxpool 3
pool3 = self.maxpool(conv3_3, [1, 2, 2, 1], [1, 2, 2, 1], "vgg_pool3")
# norm layer after pool3
norm3 = tf.nn.local_response_normalization(pool3, name="vgg_norm3")
# conv4_1 layer with relu
conv4_1 = self.conv_layer_relu(norm3, [3, 3, 256, 512], [512], [1, 1, 1, 1], "vgg_conv4_1")
# conv4_2 layer with relu
conv4_2 = self.conv_layer_relu(conv4_1, [3, 3, 512, 512], [512], [1, 1, 1, 1], "vgg_conv4_2")
# conv4_3 layer with relu
conv4_3 = self.conv_layer_relu(conv4_2, [3, 3, 512, 512], [512], [1, 1, 1, 1], "vgg_conv4_3")
# maxpool 4
pool4 = self.maxpool(conv4_3, [1, 2, 2, 1], [1, 2, 2, 1], "vgg_pool4")
# norm layer after pool4
norm4 = tf.nn.local_response_normalization(pool4, name="vgg_norm4")
# conv5_1 layer with relu
conv5_1 = self.conv_layer_relu(norm4, [3, 3, 512, 512], [512], [1, 1, 1, 1], "vgg_conv5_1")
# conv5_2 layer with relu
conv5_2 = self.conv_layer_relu(conv5_1, [3, 3, 512, 512], [512], [1, 1, 1, 1], "vgg_conv5_2")
# conv5_3 layer with relu
conv5_3 = self.conv_layer_relu(conv5_2, [3, 3, 512, 512], [512], [1, 1, 1, 1], "vgg_conv5_3")
# maxpool 5
pool5 = self.maxpool(conv5_3, [1, 2, 2, 1], [1, 2, 2, 1], "vgg_pool5")
# reshaping for fc layers
x2 = tf.reshape(pool5, [-1, 7 * 7 * 512])
# fc6 with relu
fc6 = self.fc_relu(x2, [7 * 7 * 512, 4096], [4096], "vgg_fc6")
# dropout for fc6
dropout_fc6 = self.dropout(fc6, "vgg_drop_fc6")
# fc7 with relu
fc7 = self.fc_relu(dropout_fc6, [4096, 4096], [4096], "vgg_fc7")
# dropout for fc7
dropout_fc7 = self.dropout(fc7, "vgg_drop_fc7")
# fc8 or output WITHOUT acivation and dropout
out = self.fc_relu(dropout_fc7, [4096, self.NUM_CLASSES], [self.NUM_CLASSES], "vgg_out", relu='FALSE')
return out
def resnet34(self, x):
# reshaping into 4d tensor
x = tf.reshape(x, [-1, 224, 224, 3])
conv1 = self.conv_layer_relu(x, [7, 7, 3, 64], [64], [1, 2, 2, 1], "res_conv1")
pool1 = self.maxpool(conv1, [1, 3, 3, 1], [1, 2, 2, 1], "res_pool1")
conv2_1 = self.conv_layer_relu(pool1, [3, 3, 64, 64], [64], [1, 1, 1, 1], "res_conv2_1")
conv2_2 = self.conv_layer_relu(conv2_1, [3, 3, 64, 64], [64], [1, 1, 1, 1], "res_conv2_2", relu='FALSE')
res_add1 = tf.add(pool1, conv2_2)
res_add1 = tf.nn.relu(res_add1, name='res_add1')
conv2_3 = self.conv_layer_relu(res_add1, [3, 3, 64, 64], [64], [1, 1, 1, 1], "res_conv2_3")
conv2_4 = self.conv_layer_relu(conv2_3, [3, 3, 64, 64], [64], [1, 1, 1, 1], "res_conv2_4", relu='FALSE')
res_add2 = tf.add(res_add1, conv2_4)
res_add2 = tf.nn.relu(res_add2, name='res_add2')
conv2_5 = self.conv_layer_relu(res_add2, [3, 3, 64, 64], [64], [1, 1, 1, 1], "res_conv2_5")
conv2_6 = self.conv_layer_relu(conv2_5, [3, 3, 64, 64], [64], [1, 1, 1, 1], "res_conv2_6", relu='FALSE')
res_add3 = tf.add(res_add2, conv2_6)
res_add3 = tf.nn.relu(res_add3, name='res_add3')
res_add3_conv = self.conv_layer_relu(res_add3, [1, 1, 64, 128], [128], [1, 2, 2, 1], "res_conv_res_add3",
relu='FALSE', padding='VALID')
conv3_1 = self.conv_layer_relu(res_add3, [3, 3, 64, 128], [128], [1, 2, 2, 1], "res_conv3_1")
conv3_2 = self.conv_layer_relu(conv3_1, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_2", relu='FALSE')
res_add4 = tf.add(res_add3_conv, conv3_2)
res_add4 = tf.nn.relu(res_add4, name='res_add4')
conv3_3 = self.conv_layer_relu(res_add4, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_3")
conv3_4 = self.conv_layer_relu(conv3_3, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_4", relu='FALSE')
res_add5 = tf.add(res_add4, conv3_4)
res_add5 = tf.nn.relu(res_add5, name='res_add5')
conv3_5 = self.conv_layer_relu(res_add5, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_5")
conv3_6 = self.conv_layer_relu(conv3_5, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_6", relu='FALSE')
res_add6 = tf.add(res_add5, conv3_6)
res_add6 = tf.nn.relu(res_add6, name='res_add6')
conv3_7 = self.conv_layer_relu(res_add6, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_7")
conv3_8 = self.conv_layer_relu(conv3_7, [3, 3, 128, 128], [128], [1, 1, 1, 1], "res_conv3_8", relu='FALSE')
res_add7 = tf.add(res_add6, conv3_8)
res_add7 = tf.nn.relu(res_add7, name='res_add7')
res_add7_conv = self.conv_layer_relu(res_add7, [1, 1, 128, 256], [256], [1, 2, 2, 1], "res_conv_res_add7",
relu='FALSE', padding='VALID')
conv4_1 = self.conv_layer_relu(res_add7, [3, 3, 128, 256], [256], [1, 2, 2, 1], "res_conv4_1")
conv4_2 = self.conv_layer_relu(conv4_1, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_2", relu='FALSE')
res_add8 = tf.add(res_add7_conv, conv4_2)
res_add8 = tf.nn.relu(res_add8, name='res_add8')
conv4_3 = self.conv_layer_relu(res_add8, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_3")
conv4_4 = self.conv_layer_relu(conv4_3, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_4", relu='FALSE')
res_add9 = tf.add(res_add8, conv4_4)
res_add9 = tf.nn.relu(res_add9, name='res_add9')
conv4_5 = self.conv_layer_relu(res_add9, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_5")
conv4_6 = self.conv_layer_relu(conv4_5, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_6", relu='FALSE')
res_add10 = tf.add(res_add9, conv4_6)
res_add10 = tf.nn.relu(res_add10, name='res_add10')
conv4_7 = self.conv_layer_relu(res_add10, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_7")
conv4_8 = self.conv_layer_relu(conv4_7, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_8", relu='FALSE')
res_add11 = tf.add(res_add10, conv4_8)
res_add11 = tf.nn.relu(res_add11, name='res_add11')
conv4_9 = self.conv_layer_relu(res_add11, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_9")
conv4_10 = self.conv_layer_relu(conv4_9, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_10", relu='FALSE')
res_add12 = tf.add(res_add11, conv4_10)
res_add12 = tf.nn.relu(res_add12, name='res_add12')
conv4_11 = self.conv_layer_relu(res_add12, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_11")
conv4_12 = self.conv_layer_relu(conv4_11, [3, 3, 256, 256], [256], [1, 1, 1, 1], "res_conv4_12", relu='FALSE')
res_add13 = tf.add(res_add12, conv4_12)
res_add13 = tf.nn.relu(res_add13, name='res_add13')
res_add13_conv = self.conv_layer_relu(res_add13, [1, 1, 256, 512], [512], [1, 2, 2, 1], "res_conv_res_add13",
relu='FALSE', padding='VALID')
conv5_1 = self.conv_layer_relu(res_add13, [3, 3, 256, 512], [512], [1, 2, 2, 1], "res_conv5_1")
conv5_2 = self.conv_layer_relu(conv5_1, [3, 3, 512, 512], [512], [1, 1, 1, 1], "res_conv5_2", relu='FALSE')
res_add14 = tf.add(res_add13_conv, conv5_2)
res_add14 = tf.nn.relu(res_add14, name='res_add14')
conv5_3 = self.conv_layer_relu(res_add14, [3, 3, 512, 512], [512], [1, 1, 1, 1], "res_conv5_3")
conv5_4 = self.conv_layer_relu(conv5_3, [3, 3, 512, 512], [512], [1, 1, 1, 1], "res_conv5_4", relu='FALSE')
res_add15 = tf.add(res_add14, conv5_4)
res_add15 = tf.nn.relu(res_add15, name='res_add15')
conv5_5 = self.conv_layer_relu(res_add15, [3, 3, 512, 512], [512], [1, 1, 1, 1], "res_conv5_5")
conv5_6 = self.conv_layer_relu(conv5_5, [3, 3, 512, 512], [512], [1, 1, 1, 1], "res_conv5_6", relu='FALSE')
res_add16 = tf.add(res_add15, conv5_6)
res_add16 = tf.nn.relu(res_add16, name='res_add16') # need to check if relu required here
pool2 = tf.nn.pool(res_add16, window_shape=[1, 7, 7, 1], pooling_type='AVG', padding='VALID')
# reshaping for fc layers
x2 = tf.reshape(pool2, [-1, 7 * 7 * 512])
out = self.fc_relu(x2, [7 * 7 * 512, self.NUM_CLASSES], [self.NUM_CLASSES], "res_out", relu='FALSE')
return out
網絡搭建好以後
5、模型訓練
調用網絡,設置loss,來進行分類,其中loss_fuction折騰了我好長時間,這裏我給出幾種的區別,大家可以自己實踐
損失函數(loss function),量化了分類器輸出的結果(預測值)和我們期望的結果(標籤)之間的差距,這和分類器結構本身同樣重要。有很多的學者花費心思探討如何改良損失函數使得分類器的結果最優,因此學會如何使用Tensorflow中的損失函數以及學會自己構建損失函數是非常重要的。
首先我們先規定一些變量這樣方便大家之後更加清楚的讀懂如何使用損失函數。
Labels :標籤,在分類或者分割等問題中的標準答案。可以是1,2,3,4,5,6 。
Labels_onehot : Onehot形式的標籤,即如果有3類那麼第一類表示爲[1,0,0],第二類爲[0,1,0],第三類爲[0,0,1]。這種形式的標籤更加的常見。
Network.out : 網絡最後一層的輸出,注意是沒有經過softmax的網絡的輸出,通常是softmax函數的輸入值。
Network.probs : 網絡輸出的概率結果,通常爲網絡最後一層輸出經過softmax函數之後的結果,Network.probs=tf.nn.softmax(Network.out)
Network.pred : 網絡的預測結果,在onehot的形式中選擇概率最大的一類作爲最終的預測結果,Network.pred=tf.argmax(Network.probs
, axis=n)。
Tensor : 一個張量,可以簡單的理解爲是tensorflow中的一個數組。
tf.reduce_sum(Tensor) : 降維加和,比如一個數組是3*3*3大小的,那麼經過這個操作之後會變爲一個數字,即所有元素的加和。
tf.reduce_mean(Tensor):降維平均,和上面的reduce_sum一樣,將高維的數組變爲一個數,該數是數組中所有元素的均值。
Tensorflow 內置的四個損失函數 ↓
下面我們就進入正題啦。Tf內置的損失函數一共有四個,弄懂了一個其餘的就基本理解了,下面我們就逐一的介紹,其中第一個重點介紹,其餘的建立在大家對第一個理解的基礎之上。
① Tensor=tf.nn.softmax_cross_entropy_with_logits(logits= Network.out, labels= Labels_onehot)
上面是softmax交叉熵loss,參數爲網絡最後一層的輸出和onehot形式的標籤。切記輸入一定不要經過softmax,因爲在函數中內置了softmax操作,如果再做就是重複使用了。在計算loss的時候,輸出Tensor要加上tf.reduce_mean(Tensor)或者tf.reduce_sum(Tensor),作爲tensorflow優化器(optimizer)的輸入。
② Tensor=tf.nn.sparse_softmax_cross_entropy_with_logits (logits=Network.out, labels= Labels)
這個函數和上面的區別就是labels參數應該是沒有經過onehot的標籤,其餘的都是一樣的。另外加了sparse的loss還有一個特性就是標籤可以出現-1,如果標籤是-1代表這個數據不再進行梯度回傳。
③ Tensor=tf.nn. sigmoid_cross_entropy_with_logits (logits= Network.out, labels= Labels_onehot)
sigmoid交叉熵loss,與softmax不同的是,該函數首先進行sigmoid操作之後計算交叉熵的損失函數,其餘的特性與tf.nn.softmax_cross_entropy_with_logits一致。
④Tensor=tf.nn.weighted_cross_entropy_with_logits (logits=Network.out, labels=Labels_onehot, pos_weight=decimal_number)
這個loss與衆不同的地方就是加入了一個權重的係數,其餘的地方與tf.nn. sigmoid_cross_entropy_with_logits這個損失函數是一致的,加入的pos_weight函數可以適當的增大或者縮小正樣本的loss,可以一定程度上解決正負樣本數量差距過大的問題。對比下面兩個公式我們可以更加清晰的看到,他和sigmoid的損失函數的區別,對於普通的sigmoid來說計算的形式如下:
targets * -log(sigmoid(logits)) + (1 - targets) * -log(1 - sigmoid(logits))
加入了pos_weight之後的計算形式如下:
targets * -log(sigmoid(logits)) * pos_weight + (1 - targets) * -log(1 - sigmoid(logits))
在這裏我們選擇第一種
優化器的方法也有好幾種,這裏我們選用
f.train.AdamOptimizer
目的就是快
# Import `tensorflow`
import tensorflow as tf
import tensorflow.contrib.slim as slim
import model_cnn
from create_tf_record import *
#*************************************
resize_height, resize_width=224, 224
batch_size=64 #設置批次訓練
training=True
model_path="./model/image_model" #保存的權重文件
#*************************************
train_record_output = 'dataset/train224.tfrecords'
val_record_output = 'dataset/val224.tfrecords'
# Initialize placeholders
x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224,3])
y = tf.placeholder(dtype=tf.float32, shape= [None,5])
model_net=model_cnn.CNN(num_classes=5,keep_prob=0.6)
output=model_net.alex_net(x)
##########loss fuction 1#########################
loss=tf.nn.softmax_cross_entropy_with_logits_v2(logits=output,labels=y)
loss=tf.reduce_mean(loss)
train_op=tf.train.AdamOptimizer(0.001).minimize(loss)
##########loss fuction1#########################
#手動計算交叉熵損失函數
##########loss fuction2#########################
#output=tf.nn.softmax(output)
#loss = -tf.reduce_sum(y * tf.log(tf.clip_by_value(output,1e-10,1.0) ))
#train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
##########loss fuction2#########################
saver = tf.train.Saver()
#計算準確率
#accuracy=tf.nn.in_top_k(output,y,1)
#acc = tf.cast(accuracy, tf.float32)
#acc = tf.reduce_mean(accuracy)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)), tf.float32))
tf_image, tf_label = read_records(train_record_output, resize_height, resize_width, type='normalization')
image_batch, label_batch = get_batch_images(tf_image, tf_label, batch_size=batch_size, labels_nums=5,
one_hot=True,shuffle=True)
tf_image, tf_label = read_records(val_record_output, resize_height, resize_width, type='normalization')
test_image_batch, test_label_batch = get_batch_images(tf_image, tf_label, batch_size=batch_size, labels_nums=5,
one_hot=True,shuffle=False)
if training:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
max_acc=0.0
for step in range(100001):
images, labels = sess.run([image_batch, label_batch])
#print(labels)
#print(tf.arg_max(labels,1))
#test_image,test_labels= sess.run([test_image_batch, test_label_batch])
_, mean_loss = sess.run([train_op, loss], feed_dict= {
x: images,
y: labels})
train_accuracy=sess.run(accuracy,feed_dict={
x: images,
y: labels})
#train_accuracy = accuracy.eval( {x: images, y: labels})
#test_accuracy = accuracy.eval({x: test_image_batch, y: test_label_batch})
if step % 10 == 0:
print("step = {}\tmean loss = {}\t train_acc= {}".format(step,
mean_loss,train_accuracy))
if step%100 == 0:
test_images,test_label=sess.run([test_image_batch,test_label_batch])
test_feed_dict={x:test_images,y:test_label}
val_accuracy=sess.run(accuracy,feed_dict=test_feed_dict)
print("step= {} val accuracy is {}".format(step,val_accuracy))
if max_acc<val_accuracy:
max_acc=val_accuracy
saver.save(sess,model_path)
print("save model {} epho".format(step))
#saver.save(sess,model_path)
coord.request_stop()
coord.join(threads)
print('DONE WITH EPOCH')
經過兩萬多步的訓練我們可以看到準確率在93%左右,後續我們使用預訓練模型初始化,進一步提高網絡的準確率
6、模型預測自己的圖片
接下來我給出所有的預測代碼
# Import `tensorflow`
import tensorflow as tf
import tensorflow.contrib.slim as slim
import model_cnn
from create_tf_record import *
import glob
#*************************************
resize_height, resize_width=224, 224
batch_size=64 #設置批次訓練
training=False
images_dir="/home/lenovo/tensorflow/tensorflow_models_learning/dataset/val/plane"
model_path="./model/image_model"
#*************************************
train_record_output = 'dataset/train224.tfrecords'
val_record_output = 'dataset/val224.tfrecords'
label_name_path="./label.txt"
labels_name = np.loadtxt(label_name_path, str, delimiter='\t')
# Initialize placeholders
x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224,3])
y = tf.placeholder(dtype=tf.float32, shape= [None,5])
model_net=model_cnn.CNN(num_classes=5,keep_prob=0.6)
output=model_net.alex_net(x)
##########loss fuction 1#########################
loss=tf.nn.softmax_cross_entropy_with_logits_v2(logits=output,labels=y)
loss=tf.reduce_mean(loss)
train_op=tf.train.AdamOptimizer(0.001).minimize(loss)
##########loss fuction1#########################
#手動計算交叉熵損失函數
##########loss fuction2#########################
#output=tf.nn.softmax(output)
#loss = -tf.reduce_sum(y * tf.log(tf.clip_by_value(output,1e-10,1.0) ))
#train_op = tf.train.AdamOptimizer(0.001).minimize(loss)
##########loss fuction2#########################
saver = tf.train.Saver()
#計算準確率
#accuracy=tf.nn.in_top_k(output,y,1)
#acc = tf.cast(accuracy, tf.float32)
#acc = tf.reduce_mean(accuracy)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output, 1), tf.argmax(y, 1)), tf.float32))
tf_image, tf_label = read_records(train_record_output, resize_height, resize_width, type='normalization')
image_batch, label_batch = get_batch_images(tf_image, tf_label, batch_size=batch_size, labels_nums=5,
one_hot=True,shuffle=True)
tf_image, tf_label = read_records(val_record_output, resize_height, resize_width, type='normalization')
test_image_batch, test_label_batch = get_batch_images(tf_image, tf_label, batch_size=batch_size, labels_nums=5,
one_hot=True,shuffle=False)
if training:
sess = tf.Session()
max_acc=0.0
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(30001):
images, labels = sess.run([image_batch, label_batch])
#print(labels)
#print(tf.arg_max(labels,1))
#test_image,test_labels= sess.run([test_image_batch, test_label_batch])
_, mean_loss = sess.run([train_op, loss], feed_dict= {
x: images,
y: labels})
train_accuracy=sess.run(accuracy,feed_dict={
x: images,
y: labels})
#train_accuracy = accuracy.eval( {x: images, y: labels})
#test_accuracy = accuracy.eval({x: test_image_batch, y: test_label_batch})
if step % 10 == 0:
print("step = {}\tmean loss = {}\t train_acc= {}".format(step,
mean_loss,train_accuracy))
if step%100 == 0:
test_images,test_label=sess.run([test_image_batch,test_label_batch])
test_feed_dict={x:test_images,y:test_label}
val_accuracy=sess.run(accuracy,feed_dict=test_feed_dict)
print("step= {} val accuracy is {}".format(step,val_accuracy))
if max_acc<val_accuracy:
max_acc=val_accuracy
saver.save(sess,model_path)
print("save model {} epho".format(step))
coord.request_stop()
coord.join(threads)
print('DONE WITH EPOCH')
if not training:
score = tf.nn.softmax(output, name='pre')
class_id = tf.argmax(score, 1)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
saver.restore(sess, model_path)
images_list = glob.glob(os.path.join(images_dir, '*.jpg'))
for image in images_list:
im =read_image(image,resize_height=resize_height,resize_width=resize_width,normalization=True)
im=im[np.newaxis,:]
pre_score,pre_label=sess.run([score,class_id],feed_dict={x:im})
max_score = pre_score[0, pre_label]
print("{} is: pre labels:{},name:{} score: {}".format(image, pre_label, labels_name[pre_label], max_score))
sess.close()
小博對一個文件夾下所有的飛機圖片進行了預測,我們可以看下效果
除了個別錯誤,幾乎都是正確的,後面文章馬上小博使用一些技巧來提高他的準確率
倘若訓練中斷,亦可以採取上次訓練完的模型初始化
if training:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, "./best_model")
print(sess.run(global_step))
print("load model scucess ...")
max_acc=0.0
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(30001):
images, labels = sess.run([image_batch, label_batch])
#print(labels)
#print(tf.arg_max(labels,1))
#test_image,test_labels= sess.run([test_image_batch, test_label_batch])
_, mean_loss = sess.run([train_op, loss], feed_dict= {
x: images,
y: labels})
train_accuracy=sess.run(accuracy,feed_dict={
x: images,
y: labels})
#train_accuracy = accuracy.eval( {x: images, y: labels})
#test_accuracy = accuracy.eval({x: test_image_batch, y: test_label_batch})
if step % 10 == 0:
print("step = {}\tmean loss = {}\t train_acc= {}".format(step,
mean_loss,train_accuracy))
if step%100 == 0:
test_images,test_label=sess.run([test_image_batch,test_label_batch])
test_feed_dict={x:test_images,y:test_label}
val_accuracy=sess.run(accuracy,feed_dict=test_feed_dict)
print("step= {} val accuracy is {}".format(step,val_accuracy))
saver.save(sess, model_path)
if max_acc<val_accuracy:
max_acc=val_accuracy
saver.save(sess,"./best_model")
global_step=step
print("save model {} epho".format(step))
coord.request_stop()
coord.join(threads)
print('DONE WITH EPOCH')
sess.close()