創新實訓日記五:視線追蹤模型結構的調整和數據集的改進

本週工作內容依然是繼續調整模型結構,試圖優化結果。在上週設計完成新的數據集,並在本週進行了數據的採集之後,我們在初步數據集上對兩個新的模型進行了嘗試。

一個是減少特徵,只有一層卷積、一層池化、一層全連接結構的簡單模型;一個是增加特徵,包括臉部卷積、池化、全連接,眼睛部位卷積、池化、全連接以及總的全連接等結構的複雜模型。

相關代碼如下
簡單模型

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: vali
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 27 19:54:34 2019

@author: vali
"""

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 31 21:02:14 2019

@author: vali
"""

# coding:utf8
 
import tensorflow as tf
import numpy as np
import loadData2 as ld
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 
def weight_variable(shape):
	'''
	使用卷積神經網絡會有很多權重和偏置需要創建,我們可以定義初始化函數便於重複使用
	這裏我們給權重製造一些隨機噪聲避免完全對稱,使用截斷的正態分佈噪聲,標準差爲0.1
	:param shape: 需要創建的權重Shape
	:return: 權重Tensor
	'''
	initial = tf.random_normal(shape,stddev=0.01)
	return tf.Variable(initial)
 
 
def bias_variable(shape):
	'''
	偏置生成函數,因爲激活函數使用的是ReLU,我們給偏置增加一些小的正值(0.1)避免死亡節點(dead neurons)
	:param shape:
	:return:
	'''
	initial = tf.constant(0.1, shape=shape)
	return tf.Variable(initial)
 
 
def conv2d(x, W):
	'''
	卷積層接下來要重複使用,tf.nn.conv2d是Tensorflow中的二維卷積函數,
	:param x: 輸入 例如[5, 5, 1, 32]代表 卷積核尺寸爲5x5,1個通道,32個不同卷積核
	:param W: 卷積的參數
		strides:代表卷積模板移動的步長,都是1代表不遺漏的劃過圖片的每一個點.
		padding:代表邊界處理方式,SAME代表輸入輸出同尺寸
	:return:
	'''
	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
def max_pool_2x2(x):
	'''
	tf.nn.max_pool是TensorFLow中最大池化函數.我們使用2x2最大池化
	因爲希望整體上縮小圖片尺寸,因而池化層的strides設爲橫豎兩個方向爲2步長
	:param x:
	:return:
	'''
	return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

def generator():
    eye_img,img_info,img_label = ld.load_data('dataset_300.json')
    length = len(eye_img)
    index = 0
    while (True):
        input_eye = eye_img[index]
        input_img_info = img_info[index]
        input_label = img_label[index]
        
        yield(input_eye,input_img_info,input_label)
        
        index+=1
        if(index==length):
            index=0
        
        
 
def train(val_eye,val_data_info,val_label):
    
    data = tf.data.Dataset.from_generator(generator,(tf.float32,tf.float32,tf.float32),(tf.TensorShape([25,50,3]),
                                                     tf.TensorShape([4]),tf.TensorShape([2])))
    data = data.shuffle(158).batch(25)#當shuffle爲數據集大小的時候是完全打亂,batch是一次的大小
    data = data.repeat()#代數
    data = data.make_one_shot_iterator()
    gen = data.get_next()
    
    
    
    
    
	# 使用佔位符
    x0 = tf.placeholder(tf.float32, [None, 25,50,3],'x0')# x爲特徵  
   
    x_info = tf.placeholder(tf.float32,[None,4],'x_info')#x_info
    
    
    y_ = tf.placeholder(tf.float32, [None,2],'y0')# y_爲label

    
    
    #眼睛部分網絡
	# 第一個卷積層  [5, 5, 3, 32]代表 卷積核尺寸爲5x5,3個通道,32個不同卷積核
	# 創建濾波器權值-->加偏置-->卷積-->池化
    W_conv_eye = weight_variable([5, 5, 3, 20])
    b_conv_eye = bias_variable([20])
    h_conv_eye = tf.nn.relu(conv2d(x0, W_conv_eye)+b_conv_eye) #25x50x3 與32個5x5x1濾波器 --> 25x50x32
    h_pool_eye = max_pool_2x2(h_conv_eye)  # 25x50x32 -->13x25x32
    
    h_pool_eye_flat = tf.reshape(h_pool_eye,[-1,13*25*20])
    
    keep_prob_eye = tf.placeholder(tf.float32)
    h_eye_drop = tf.nn.dropout(h_pool_eye_flat,keep_prob_eye)
    
    h_test = tf.concat([h_eye_drop,x_info],1)
    
    W_fc_eye = weight_variable([13*25*20+4,2])
    b_fc_eye = bias_variable([2])
    y_conv = tf.nn.tanh(tf.matmul(h_test,W_fc_eye)+b_fc_eye)
    
 
 
	# 定義損失函數,使用均方誤差  同時定義優化器  learning rate = 1e-4
    cross_entropy = tf.losses.mean_squared_error(y_conv,y_)
    train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
 
	# 定義評測準確率
   
	#開始訓練
    #writer = tf.summary.FileWriter("./", tf.get_default_graph())
    #saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer() #初始化所有變量
        sess.run(init_op)
        
        
 
        STEPS = 600
        for i in range(STEPS):
			#batch = 10
            batch_eye,batch_img_info ,batch_y= sess.run(gen)
            
            if i % 2 == 0:
                train_cross_entropy= sess.run(cross_entropy,feed_dict={x0:batch_eye , y_:batch_y,
                                                                       x_info:batch_img_info, keep_prob_eye:1.0})
                print(i,train_cross_entropy)
                    
                
            sess.run(train_step, feed_dict={x0: batch_eye,y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:0.2})
        #saver.save(sess,'./model/model',global_step=STEPS)
        
        
        #validation
        for i in range(10):
            batch_eye=val_eye[i].reshape((1,25,50,3))
            batch_img_info =val_data_info[i].reshape((1,4))
            batch_y= val_label[i].reshape((1,2))
            res=sess.run([cross_entropy,y_conv], feed_dict={x0: batch_eye, y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:1.0})
            print('--------------validation---------------')
            print(i,res[0])
            print(batch_y,res[1])
            

if __name__=="__main__":
   val_eye,val_data_info,val_label = ld.load_data('dataset_300.json',1)
   train(val_eye,val_data_info,val_label )


複雜模型

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: vali
"""

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 31 21:02:14 2019

@author: vali
"""

# coding:utf8
 
import tensorflow as tf
import numpy as np
import loadData as ld
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 
def weight_variable(shape):
	'''
	使用卷積神經網絡會有很多權重和偏置需要創建,我們可以定義初始化函數便於重複使用
	這裏我們給權重製造一些隨機噪聲避免完全對稱,使用截斷的正態分佈噪聲,標準差爲0.1
	:param shape: 需要創建的權重Shape
	:return: 權重Tensor
	'''
	initial = tf.random_normal(shape,stddev=0.01)
	return tf.Variable(initial)
 
 
def bias_variable(shape):
	'''
	偏置生成函數,因爲激活函數使用的是ReLU,我們給偏置增加一些小的正值(0.1)避免死亡節點(dead neurons)
	:param shape:
	:return:
	'''
	initial = tf.constant(0.1, shape=shape)
	return tf.Variable(initial)
 
 
def conv2d(x, W):
	'''
	卷積層接下來要重複使用,tf.nn.conv2d是Tensorflow中的二維卷積函數,
	:param x: 輸入 例如[5, 5, 1, 32]代表 卷積核尺寸爲5x5,1個通道,32個不同卷積核
	:param W: 卷積的參數
		strides:代表卷積模板移動的步長,都是1代表不遺漏的劃過圖片的每一個點.
		padding:代表邊界處理方式,SAME代表輸入輸出同尺寸
	:return:
	'''
	return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding="SAME")
 
 
def max_pool_2x2(x):
	'''
	tf.nn.max_pool是TensorFLow中最大池化函數.我們使用2x2最大池化
	因爲希望整體上縮小圖片尺寸,因而池化層的strides設爲橫豎兩個方向爲2步長
	:param x:
	:return:
	'''
	return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")

def generator():
    eye_img,face_img,img_info,img_label = ld.load_data('dataset_300.json')
    length = len(eye_img)
    index = 0
    while (True):
        input_eye = eye_img[index]
        input_face = face_img[index]
        input_img_info = img_info[index]
        input_label = img_label[index]
        
        yield(input_eye,input_face,input_img_info,input_label)
        
        index+=1
        if(index==length):
            index=0
        
        
 
def train(val_eye,val_face,val_data_info,val_label):
    
    data = tf.data.Dataset.from_generator(generator,(tf.float32,tf.float32,tf.float32,tf.float32),(tf.TensorShape([25,50,3]),
                                                     tf.TensorShape([50,50,3]),tf.TensorShape([4]),tf.TensorShape([2])))
    data = data.shuffle(241).batch(25)#當shuffle爲數據集大小的時候是完全打亂,batch是一次的大小
    data = data.repeat()#代數
    data = data.make_one_shot_iterator()
    gen = data.get_next()
    
	# 使用佔位符
    x0 = tf.placeholder(tf.float32, [None, 25,50,3],'x0')# x爲特徵  
   
    x_face = tf.placeholder(tf.float32, [None, 50,50,3],'x_face')
    x_info = tf.placeholder(tf.float32,[None,4],'x_info')#x_info
    
    
    y_ = tf.placeholder(tf.float32, [None,2],'y0')# y_爲label

    
    
    #眼睛部分網絡
	# 第一個卷積層  [5, 5, 3, 32]代表 卷積核尺寸爲5x5,3個通道,32個不同卷積核
	# 創建濾波器權值-->加偏置-->卷積-->池化
    W_conv_eye = weight_variable([5, 5, 3, 32])
    b_conv_eye = bias_variable([32])
    h_conv_eye = tf.nn.relu(conv2d(x0, W_conv_eye)+b_conv_eye) #25x50x3 與32個5x5x1濾波器 --> 25x50x32
    h_pool_eye = max_pool_2x2(h_conv_eye)  # 25x50x32 -->13x25x32
    
    h_pool_eye_flat = tf.reshape(h_pool_eye,[-1,13*25*32])
    
    keep_prob_eye = tf.placeholder(tf.float32)
    h_eye_drop = tf.nn.dropout(h_pool_eye_flat,keep_prob_eye)
    
    h_test = tf.concat([h_eye_drop,x_info],1)
    
    W_fc_eye = weight_variable([13*25*32,128])
    b_fc_eye = bias_variable([128])
    h_fc_eye = tf.nn.tanh(tf.matmul(h_eye_drop,W_fc_eye)+b_fc_eye)
    
    
    #臉部分,兩層卷積
    W_conv_face1 = weight_variable([5,5,3,32])
    b_conv_face1 = bias_variable([32])
    h_conv_face1 = tf.nn.relu(conv2d(x_face,W_conv_face1)+b_conv_face1)
    h_pool_face1 = max_pool_2x2(h_conv_face1)
    
    W_conv_face2 = weight_variable([5,5,32,64])
    b_conv_face2 = bias_variable([64])
    h_conv_face2 = tf.nn.relu(conv2d(h_pool_face1,W_conv_face2)+b_conv_face2)
    h_pool_face2 = max_pool_2x2(h_conv_face2)
    
    h_pool_face_flat= tf.reshape(h_pool_face2,[-1,13*13*64])
    
    W_fc_face1 = weight_variable([13*13*64,128])
    b_fc_face1 = bias_variable([128])
    h_fc_face1  = tf.nn.tanh(tf.matmul(h_pool_face_flat,W_fc_face1)+b_fc_face1)
    keep_prob_face = tf.placeholder(tf.float32)
    h_fc_face1_drop = tf.nn.dropout(h_fc_face1,keep_prob_face)
    
    
    W_fc_face2 = weight_variable([128,64])
    b_fc_face2 = bias_variable([64])
    h_fc_face2  = tf.nn.tanh(tf.matmul(h_fc_face1_drop,W_fc_face2)+b_fc_face2)
    
    
    
    #合併
    h_pool_merge1 = tf.concat([h_fc_eye,h_fc_face2],1)
    
#    W_fc_feature = weight_variable([4,32])
#    b_fc_feature = bias_variable([32])
#    h_fc_feature  = tf.nn.tanh(tf.matmul(x_info,W_fc_feature)+b_fc_feature)
    
    h_pool_merge2 = tf.concat([h_pool_merge1,x_info],1)
 
    W_fc1 = weight_variable([196, 128])
    b_fc1 = bias_variable([128])
    
    h_fc1 = tf.nn.tanh(tf.matmul(h_pool_merge2, W_fc1) + b_fc1)  #FC層傳播 196--> 128
    
	# 使用Dropout層減輕過擬合,通過一個placeholder傳入keep_prob比率控制
	# 在訓練中,我們隨機丟棄一部分節點的數據來減輕過擬合,預測時則保留全部數據追求最佳性能
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
 
	# 將Dropout層的輸出連接到一個Softmax層,得到最後的概率輸出
    W_fc2 = weight_variable([128, 2])  #2種輸出可能
    b_fc2 = bias_variable([2])
    y_conv = tf.nn.tanh(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
 
	# 定義損失函數,使用均方誤差  同時定義優化器  learning rate = 1e-4
    cross_entropy = tf.losses.mean_squared_error(y_conv,y_)
    train_step = tf.train.AdamOptimizer(0.0005).minimize(cross_entropy)
 
	# 定義評測準確率
    accuracy = y_conv
	#開始訓練
    #writer = tf.summary.FileWriter("./", tf.get_default_graph())
    #saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.global_variables_initializer() #初始化所有變量
        sess.run(init_op)
        
        
 
        STEPS = 1000
        for i in range(STEPS):
			#batch = 10
            batch_eye,batch_face,batch_img_info ,batch_y= sess.run(gen)
            
            if i % 2 == 0:
                train_cross_entropy= sess.run(cross_entropy,feed_dict={x0:batch_eye , x_face:batch_face,y_:batch_y,
                                                                       x_info:batch_img_info, keep_prob_eye:1.0,keep_prob_face: 1.0,keep_prob:1.0})
                print(i,train_cross_entropy)
                    
                
            sess.run(train_step, feed_dict={x0: batch_eye,x_face:batch_face, y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:0.2,keep_prob_face: 0.8,keep_prob:0.5})
        #saver.save(sess,'./model/model',global_step=STEPS)
        
        
        #validation
        for i in range(10):
            batch_eye=val_eye[i].reshape((1,25,50,3))
            batch_face=val_face[i].reshape((1,50,50,3))
            batch_img_info =val_data_info[i].reshape((1,4))
            batch_y= val_label[i].reshape((1,2))
            res=sess.run([cross_entropy,y_conv], feed_dict={x0: batch_eye,x_face:batch_face, y_: batch_y,
                                            x_info: batch_img_info, keep_prob_eye:1.0,keep_prob_face: 1.0,keep_prob:1.0})
            print('--------------validation---------------')
            print(i,res[0])
            print(batch_y,res[1])
            
        

if __name__=="__main__":
   val_eye,val_face,val_data_info,val_label = ld.load_data('dataset.json',1)
   train(val_eye,val_face,val_data_info,val_label )

經過反覆調整batch,learning rate,在模型上訓練400輪的結果如下
複雜模型訓練結果和驗證結果

複雜模型訓練結果

在這裏插入圖片描述


簡單模型上的訓練驗證結果

在這裏插入圖片描述

在這裏插入圖片描述


在小數據集(251個樣本)上,從上圖中可以看出,同樣在400輪的基礎上,複雜模型在訓練集上表現很好,在驗證集上表現很差,可見有嚴重的過擬合;在簡單模型上,訓練集上的表現和複雜模型差不多,但在驗證集上表現就比複雜模型好很多了。

而且在訓練輪數增加之後,複雜模型loss能更進一步降到0.001左右,而簡單模型就基本在0.008左右。

對於這種現象,下一步我們決定適當減少複雜模型的特徵,並擴充數據集,再對比簡單模型看效果是否有改善。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章