python/c++讀/存16bit圖像 tensorflow--different inference result in python/c++

以前python下訓練、驗證模型,工程化到C++驗證結果與python下一致。

這次python下重新搭建訓練模型、驗證模型,沒問題,結果是正確的。但是當工程化到c++時發現c++結果不正確且與python下驗證結果不一致。

python下訓練代碼部分如下:可以看到是採用decode_png方式讀取圖片數據

def get_batch(image, label, image_W, image_H, batch_size, capacity):    
    # step1:將上面生成的List傳入get_batch() ,轉換類型,產生一個輸入隊列queue    
    # tf.cast()用來做類型轉換    
    image = tf.cast(image, tf.string)   
    
    # 可變長度的字節數組.每一個張量元素都是一個字節數組    
    label = tf.cast(label, tf.int32)    
    # tf.train.slice_input_producer是一個tensor生成器    
    # 作用是按照設定,每次從一個tensor列表中按順序或者隨機抽取出一個tensor放入文件名隊列。    
    input_queue = tf.train.slice_input_producer([image, label])    
    label = input_queue[1]    
    image_contents = tf.read_file(input_queue[0])   
    # tf.read_file()從隊列中讀取圖像    
    
    #print(image_contents)
    
    #image=tf.py_func(lambda input:cv2.imread(input,flags=-1),[image_contents],tf.float32)
    
    
    
    '''
    imagepath=bytes.decode(image_contents)
    imagecv=cv2.imread(imagepath,flags=-1)
    image=tf.convert_to_tensor(imagecv)
    '''
    # step2:將圖像解碼,使用相同類型的圖像    
    image = tf.image.decode_png(image_contents, channels=3,dtype=tf.uint16)
    #print(image)    
    
    
    #2020.3.17
    image=tf.image.resize_images(image,[image_W,image_H])
    
    image.set_shape([image_W,image_H,3])
    #print(image)
    #image = array_ops.expand_dims(image, 0)
    
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.transpose_image(image)
    #print(image)
    
    # 對resize後的圖片進行標準化處理    
    image = tf.image.per_image_standardization(image)  
    
    # step4:生成batch    
    # image_batch: 4D tensor [batch_size, width, height, 3], dtype = tf.float32    
    # label_batch: 1D tensor [batch_size], dtype = tf.int32    
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=16, capacity=capacity)     
    # 重新排列label,行數爲[batch_size]    
    label_batch = tf.reshape(label_batch, [batch_size])    
    # image_batch = tf.cast(image_batch, tf.uint8)  # 顯示彩色圖像   
    image_batch = tf.cast(image_batch, tf.float32)   # 顯示灰度圖   
    # print(label_batch) Tensor("Reshape:0", shape=(6,), dtype=int32) 
    
    #Change to ONE-HOT
    label_batch = tf.one_hot(label_batch, depth= 2)
    label_batch = tf.cast(label_batch, dtype=tf.int32)
    label_batch = tf.reshape(label_batch, [batch_size, 2])
    #print(label_batch)
    #print(image_batch)
    #https://blog.csdn.net/xiaomu_347/article/details/81040855
    
    return image_batch, label_batch   

python下的驗證代碼如下:(也是decode_png讀取圖片)

import os
import numpy as np
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt


config = tf.ConfigProto()
config.gpu_options.allow_growth = True

image_W=64;
image_H=64;

#use output directly
with tf.Session() as sess1:
    sess1.run(tf.global_variables_initializer())
    new_saver1 = tf.train.import_meta_graph(r'D:\wdxrt0305\0310\xrtmodel\0317model\model.meta')
    new_saver1.restore(sess1,tf.train.latest_checkpoint(r'D:\wdxrt0305\0310\xrtmodel\0317model'))
    graph1 = tf.get_default_graph()
    input_x1 = graph1.get_tensor_by_name("input:0")
    outputlabel = graph1.get_tensor_by_name("output:0")
    
    PATH = 'E:/xrtcnn-20200305/imgs/objs/0.png'
    image_dir = tf.gfile.FastGFile(PATH, 'rb').read()
    imagepng = tf.image.decode_png(image_dir, channels=3,dtype=tf.uint16)
    
    
    #2020.3.17
    image2=tf.image.resize_images(imagepng,[image_W,image_H])#bilinear
    #pngdata=image2.eval()
    #print(pngdata)
    image2.set_shape([image_W,image_H, 3]) 
    image = tf.image.per_image_standardization(image2)
    debug=image.eval()
    print(debug)
    feed_dict1={input_x1:np.reshape(debug, [-1,image_W,image_H,3])} 
    img_out_label = sess1.run(outputlabel,feed_dict1)
    
    out_softmax1 = graph1.get_tensor_by_name("softmax:0")
    img_out_softmax = sess1.run(out_softmax1,feed_dict1)
    s1=img_out_softmax[0][0]
    s2=img_out_softmax[0][1]
    print("fei: ",s1," , kuang: ",s2)
    print ("img_out_label:",img_out_label)
    #del pngdata
    del debug # memory leak
sess1.close()

如上python驗證代碼中是用decode_png方式讀取圖片數據,我發現如果使用下面這兩句替換decode_png預測也是一樣的效果:

image_png=cv2.imread(PATH,flags=-1)
imagepng = image_png[...,::-1].astype(np.float32)

雖然是opencv讀取圖片數據,讀成數組。在tf.image.resize_images這句也可以接受數組的。反正兩種方式讀出來概率完全一樣。

於是我按照python下的驗證代碼直接轉c++下的驗證代碼基本如下:

/////////////////////////////////////find the difference between python inference and c++ inference
int main()//xrtCnnPredict()//
{
	int standard_rows=64;
	int standard_cols=standard_rows;
	string graphpath="/media/root/Ubuntu311/projects/Xrt_projects/XRT_CNN_Primer/xrtmodel/0317model/model.meta";
	string modelpath="/media/root/Ubuntu311/projects/Xrt_projects/XRT_CNN_Primer/xrtmodel/0317model/model";

	///////CNN initiation--
	tensorflow::Session* session;
	tensorflow::Status status = NewSession(tensorflow::SessionOptions(), &session);
	if (!status.ok())
	{
		std::cout << "ERROR: NewSession() init failed..." << std::endl;
		return -1;
	}
	tensorflow::MetaGraphDef graphdef;
	tensorflow::Status status_load = ReadBinaryProto(tensorflow::Env::Default(), graphpath, &graphdef); //從meta文件中讀取圖模型;
	if (!status_load.ok()) {
			std::cout << "ERROR: Loading model failed..." << std::endl;
			std::cout << status_load.ToString() << "\n";
			return -1;
	}
	tensorflow::Status status_create = session->Create(graphdef.graph_def()); //將模型導入會話Session中;
	if (!status_create.ok()) {
			std::cout << "ERROR: Creating graph in session failed..." << status_create.ToString() << std::endl;
			return -1;
	}
	// 讀入預先訓練好的模型的權重
	tensorflow::Tensor checkpointPathTensor(tensorflow::DT_STRING, tensorflow::TensorShape());
	checkpointPathTensor.scalar<std::string>()() = modelpath;
	status = session->Run(
			  {{ graphdef.saver_def().filename_tensor_name(), checkpointPathTensor },},
			  {},{graphdef.saver_def().restore_op_name()},nullptr);
	if (!status.ok())
	{
		  throw runtime_error("Error loading checkpoint for long algaes ...");
	}


	///////CNN put in images--
	char srcfile[300];
	char feifile[300];
	char orefile[300];

	int all=0;
	for(int idx=3260;idx<=5844;idx++)
	{
		sprintf(srcfile, "/media/root/77B548B10AEA26F1/problemimg/problemtest/%d.png", idx);
		Mat src=imread(srcfile,-1);
		if(!src.data)
		{
			//cout<<"warning:the image does not exist!"<<endl;
			continue;
		}

		cv::Mat rgbsrc(standard_rows, standard_cols, CV_16UC3);
		cv::resize(src,rgbsrc,cv::Size(standard_cols,standard_rows));
		/*
		for(int r=0;r!=20;r++)
		{
			for(int c=0;c!=standard_cols;c++)
			{
				//cout<<"[("<<r<<","<<c<<") "<<dstimgs.ptr<float>(r)[3*c]<<","<<dstimgs.ptr<float>(r)[3*c+1]<<","<<dstimgs.ptr<float>(r)[3*c+2]<<"] ";
				cout<<"("<<r<<","<<c<<":"<<rgbsrc.ptr<ushort>(r)[3*c+1]<<") ";
			}
			cout<<endl;
		}
		cout<<endl<<endl;
		*/

		Mat srccnn(standard_rows, standard_cols, CV_16UC3);
		for(int r=0;r!=standard_rows;r++)
		{
			for(int c=0;c!=standard_cols;c++)
			{
				srccnn.ptr<ushort>(r)[3*c]=rgbsrc.ptr<ushort>(r)[3*c+2];
				srccnn.ptr<ushort>(r)[3*c+1]=rgbsrc.ptr<ushort>(r)[3*c+1];
				srccnn.ptr<ushort>(r)[3*c+2]=rgbsrc.ptr<ushort>(r)[3*c];
				//cout<<"[("<<r<<","<<c<<") "<<src.ptr<ushort>(r)[3*c]<<","<<src.ptr<ushort>(r)[3*c+1]<<","<<src.ptr<ushort>(r)[3*c+2]<<"] ";
			}
			//cout<<endl;
		}
		//cout<<endl<<endl;

		/*
		for(int r=0;r!=20;r++)
		{
			for(int c=0;c!=standard_cols;c++)
			{
				//cout<<"[("<<r<<","<<c<<") "<<dstimgs.ptr<float>(r)[3*c]<<","<<dstimgs.ptr<float>(r)[3*c+1]<<","<<dstimgs.ptr<float>(r)[3*c+2]<<"] ";
				cout<<"("<<r<<","<<c<<":"<<srccnn.ptr<ushort>(r)[3*c+1]<<") ";
			}
			cout<<endl;
		}
		cout<<endl<<endl;
		*/
		//CNN start...
		tensorflow::Tensor resized_tensor(tensorflow::DT_FLOAT, tensorflow::TensorShape({1,standard_rows,standard_cols,3}));
		float *imgdata = resized_tensor.flat<float>().data();
		cv::Mat cnninputImg(standard_rows, standard_cols, CV_32FC3, imgdata);

		//歸一化差別較大
		//標準化差別較小
		int pixelsnum=standard_rows*standard_cols;
		cv::Mat dstimgs(standard_rows, standard_cols, CV_32FC3);
		standardImageProcess(srccnn,dstimgs,pixelsnum);
		dstimgs.copyTo(cnninputImg);
		/*
		for(int r=0;r!=20;r++)
		{
			for(int c=0;c!=standard_cols;c++)
			{
				//cout<<"[("<<r<<","<<c<<") "<<dstimgs.ptr<float>(r)[3*c]<<","<<dstimgs.ptr<float>(r)[3*c+1]<<","<<dstimgs.ptr<float>(r)[3*c+2]<<"] ";
				cout<<"("<<r<<","<<c<<":"<<dstimgs.ptr<float>(r)[3*c]<<") ";
			}
			cout<<endl;
		}
		cout<<endl<<endl;
		*/

		//CNN input
		vector<std::pair<string, tensorflow::Tensor> > inputs;
		std::string Input1Name = "input";
		inputs.push_back(std::make_pair(Input1Name, resized_tensor));

		//CNN predict
		vector<tensorflow::Tensor> outputs;
		string output="softmax";//output:0
		tensorflow::Status status_run = session->Run(inputs, {output}, {}, &outputs);
		if (!status_run.ok()) {
		   std::cout << "ERROR: RUN failed in PreAlgaeRecognitionProcess()..."  << std::endl;
		   std::cout << status_run.ToString() << "\n";
		}

		int label=-1;
		double prob=0.0;
//		cout<<"image "<<idx<<" ";
		getPredictLabel(outputs[0],label,prob);
		cout<<endl;

//		if(label==0)
//		{
//			sprintf(feifile, "/media/root/77B548B10AEA26F1/problemimg/problemresult/fei/%d_c_%f.png", all,prob);
//			imwrite(feifile,src);
//		}
//		else
//		{
//			sprintf(orefile, "/media/root/77B548B10AEA26F1/problemimg/problemresult/kuang/%d_c_%f.png", all,prob);
//			imwrite(orefile,src);
//		}
//		all++;
		//CNN end...
	}

	return 0;
}
void standardImageProcess(Mat channel3imgs,Mat &dstimgs,int pixelsnum)
{
	vector<Mat> bgrimgs;
	cv::split(channel3imgs,bgrimgs);
	Mat meanbimg,stddevbimg;
	Mat bimg=bgrimgs[0];
	cv::meanStdDev(bimg,meanbimg,stddevbimg);
	float bmean=meanbimg.at<double>(0);
	float bstddev=stddevbimg.at<double>(0);

	Mat bimgdst(bimg.rows,bimg.cols,CV_32FC1);
	//subtract(bimg, bmean, bimg);
	//bimgdst = bimg / bstddev;
	imagestandard(bimg,bmean,bstddev,pixelsnum,bimgdst);

	Mat meangimg,stddevgimg;
	Mat gimg=bgrimgs[1];
	cv::meanStdDev(gimg,meangimg,stddevgimg);
	float gmean=meangimg.at<double>(0);
	float gstddev=stddevgimg.at<double>(0);
	Mat gimgdst(bimg.rows,bimg.cols,CV_32FC1);

	//subtract(gimg, gmean, gimg);
	//gimgdst = gimg / gstddev;
	imagestandard(gimg,gmean,gstddev,pixelsnum,gimgdst);

	Mat meanrimg,stddevrimg;
	Mat rimg=bgrimgs[2];
	cv::meanStdDev(rimg,meanrimg,stddevrimg);
	float rmean=meanrimg.at<double>(0,0);
	float rstddev=stddevrimg.at<double>(0,0);
	Mat rimgdst(bimg.rows,bimg.cols,CV_32FC1);

	//subtract(rimg, rmean, rimg);
	//rimgdst = rimg / rstddev;
	imagestandard(rimg,rmean,rstddev,pixelsnum,rimgdst);

	vector<Mat> dstimgsvec;
	dstimgsvec.push_back(bimgdst);
	dstimgsvec.push_back(gimgdst);
	dstimgsvec.push_back(rimgdst);
	cv::merge(dstimgsvec,dstimgs);
}

void imagestandard(Mat &channel1img,float mean,float std,int pixelsnum,Mat &imgdst)
{
	float thre=1.0/pixelsnum;
	for(int r=0;r!=channel1img.rows;r++)
	{
		for(int c=0;c!=channel1img.cols;c++)
		{
			float x=channel1img.ptr<ushort>(r)[c];
			imgdst.ptr<float>(r)[c]=(x-mean)/max(std,thre);
		}
	}

}

int getPredictLabel(tensorflow::Tensor &probabilities,int &output_class_id,double &output_prob)
{
	 int ndim2 = probabilities.shape().dims();             // Get the dimension of the tensor
	  auto tmap = probabilities.tensor<float, 2>();        // Tensor Shape: [batch_size, target_class_num]
	  int output_dim = probabilities.shape().dim_size(1);  // Get the target_class_num from 1st dimension
	  std::vector<double> tout;

	  // Argmax: Get Final Prediction Label and Probability
	  for (int j = 0; j < output_dim; j++)
	  {
			//std::cout << "Class " << j << " prob:" << tmap(0, j) << "," << std::endl;
			if (tmap(0, j) >= output_prob) {
					output_class_id = j;
					output_prob = tmap(0, j);
			}
	  }

	  std::cout <<" prob:" << tmap(0, 0) << " , "<<tmap(0, 1) << std::endl;

	return 0;
}

當我使用的同樣的一張圖片、同樣的模型,出來結果相差很大,類別都不一樣。

於是我一步步找差別,發現:

一、讀存16bit圖像不一樣

python驗證代碼中兩種讀取數據都是按r,g,b方式來的,而opencv是b,g,r!!!所以必須先對imread後的結果通道轉換。

也就是上面所示的這句:

imagepng = image_png[...,::-1].astype(np.float32)

c++笨拙寫的這樣:

Mat srcrgb(src.rows, src.cols, CV_16UC3);
		for(int r=0;r!=src.rows;r++)
		{
			for(int c=0;c!=src.cols;c++)
			{
				srcrgb.ptr<ushort>(r)[3*c]=src.ptr<ushort>(r)[3*c+2];
				srcrgb.ptr<ushort>(r)[3*c+1]=src.ptr<ushort>(r)[3*c+1];
				srcrgb.ptr<ushort>(r)[3*c+2]=src.ptr<ushort>(r)[3*c];
			}
		}

經過對比每個像素點的數據,發現python和c++下每個數據已統一。

 

二、resize方式

我查看了下python和c++下的resize方式是一樣的,都是雙線性插值法。但是python下tf.image.resize與c++下cv::resize後的數據對比發現很不一樣。不止是float與ushort的差別,是每個像素點的數據的問題!

三、標準化方式

python下的標準化是直接調用tf.image.per_image_standardization;而C++沒有現成的函數,我試過歸一化結果與tf.image.per_image_standardization差別較大,於是只能按公式像上面給出的那樣寫了一個函數,差別出來比較接近。但還是有一點差別。

但是即使以上三點差別,第二、三點的綜合差別竟然導致c++/python預測類別不一致啊,你說概率不一致還能理解,但類別都不一樣?!!!

我查了很多方式但並未直接解決這個問題。

有一個問題我很疑惑,python下:訓練模型時是decode_png讀圖,然後使用tf.image.resize...等預處理訓練圖片,那是否python驗證時也一定要decode_png讀圖、tf.image.resize來處理驗證圖??事實證明,如果也這樣做,預測出來的結果是正確的(因爲和訓練中處理圖片的代碼一樣)(預測出來概率是[0.9297426 , 0.07025738])。但如果不這樣做,比如python下加載這個模型用cv2.imread、cv2.resize(和訓練處理圖片代碼不同)來處理驗證同一張圖片,發現預測出來概率是[0.83095837, 0.1690416]。我知道這張圖片實際是第一類,這樣看這兩種方式驗證類別是正確的,但是概率不一樣,到底是第一種方式概率正確還是第二種???????如果是第一種,那沒辦法c++工程化,因爲c++下無法tf.image.resize。如果第二種方式正確,完全可以c++工程化而且與python下驗證結果包括概率都一樣。

我目前用的第二種方式,若有人知道,望解答。

 

 

 

最近發現一個博主寫得好,在跟着學習: https://blog.csdn.net/u010712012/category_8126761.html  

 

 

 

在跟着這個博主做實例:https://blog.csdn.net/qq_41776781/article/details/94452085

1、AlexNet

import tensorflow as tf
import numpy as np

from tensorflow.python.framework import dtypes
from tensorflow.python.framework.ops import convert_to_tensor
#from tensorflow.data import Dataset

img_size=64

#VGG_MEAN = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32)

# 把圖片數據轉化爲三維矩陣
class ImageDataGenerator(object):
    def __init__(self, images, labels, batch_size, num_classes, image_format='png', shuffle=True):
        self.img_paths = images # [P1,P2]
        self.labels = labels # [1,2]
        self.data_size = len(self.labels)        
        self.num_classes = num_classes
        self.image_format = image_format

        if shuffle:
            self._shuffle_lists()

        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
        data = tf.data.Dataset.from_tensor_slices((self.img_paths, self.labels))
        data = data.map(self._parse_function_train)
        data = data.batch(batch_size)
        self.data = data

    # 打亂圖片順序
    def _shuffle_lists(self):
        path = self.img_paths
        labels = self.labels
        permutation = np.random.permutation(self.data_size)
        self.img_paths = []
        self.labels = []
        for i in permutation:
            self.img_paths.append(path[i])
            self.labels.append(labels[i])

    # 把圖片生成三維數組,以及把標籤轉化爲向量
    def _parse_function_train(self, filename, label):
        one_hot = tf.one_hot(label, self.num_classes)
        img_string = tf.read_file(filename)
        if self.image_format == "jpg": # 增加圖片類別區分
            img_decoded = tf.image.decode_jpeg(img_string, channels=3,dtype=tf.uint16)
        elif self.image_format == "png":
            img_decoded = tf.image.decode_png(img_string, channels=3,dtype=tf.uint16)
        else:
            print("Error! Can't confirm the format of images!")
        img_resized = tf.image.resize_images(img_decoded, [img_size, img_size])
        img_resized.set_shape([img_size,img_size,3])
        image = tf.image.random_flip_left_right(img_resized)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.transpose_image(image)
        image = tf.cast(image, tf.float32) * (1. / 65535)
import tensorflow as tf

def alexnet(x, keep_prob, num_classes):
    # conv1
    with tf.name_scope('conv1') as scope:
        kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 96], dtype=tf.float32,
                                             stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(x, kernel, [1, 4, 4, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[96], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name=scope)

    # lrn1
    with tf.name_scope('lrn1') as scope:
        lrn1 = tf.nn.local_response_normalization(conv1,
                                                  alpha=1e-4,
                                                  beta=0.75,
                                                  depth_radius=2,
                                                  bias=2.0)

    # pool1
    with tf.name_scope('pool1') as scope:
        pool1 = tf.nn.max_pool(lrn1,
                             ksize=[1, 3, 3, 1],
                             strides=[1, 2, 2, 1],
                             padding='VALID')

    # conv2
    with tf.name_scope('conv2') as scope:
        pool1_groups = tf.split(axis=3, value = pool1, num_or_size_splits = 2)
        kernel = tf.Variable(tf.truncated_normal([5, 5, 48, 256], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        kernel_groups = tf.split(axis=3, value = kernel, num_or_size_splits = 2)
        conv_up = tf.nn.conv2d(pool1_groups[0], kernel_groups[0], [1,1,1,1], padding='SAME')
        conv_down = tf.nn.conv2d(pool1_groups[1], kernel_groups[1], [1,1,1,1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)
        bias_up = tf.nn.bias_add(conv_up, biases_groups[0])
        bias_down = tf.nn.bias_add(conv_down, biases_groups[1])
        bias = tf.concat(axis=3, values=[bias_up, bias_down])
        conv2 = tf.nn.relu(bias, name=scope)

    # lrn2
    with tf.name_scope('lrn2') as scope:
        lrn2 = tf.nn.local_response_normalization(conv2,
                                                  alpha=1e-4,
                                                  beta=0.75,
                                                  depth_radius=2,
                                                  bias=2.0)

    # pool2
    with tf.name_scope('pool2') as scope:
        pool2 = tf.nn.max_pool(lrn2,
                             ksize=[1, 3, 3, 1],
                             strides=[1, 2, 2, 1],
                             padding='VALID')                         

    # conv3
    with tf.name_scope('conv3') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 384],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(bias, name=scope)

    # conv4
    with tf.name_scope('conv4') as scope:
        conv3_groups = tf.split(axis=3, value=conv3, num_or_size_splits=2)
        kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)
        conv_up = tf.nn.conv2d(conv3_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')
        conv_down = tf.nn.conv2d(conv3_groups[1], kernel_groups[1], [1,1,1,1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                             trainable=True, name='biases')
        biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)
        bias_up = tf.nn.bias_add(conv_up, biases_groups[0])
        bias_down = tf.nn.bias_add(conv_down, biases_groups[1])
        bias = tf.concat(axis=3, values=[bias_up,bias_down])
        conv4 = tf.nn.relu(bias, name=scope)

    # conv5
    with tf.name_scope('conv5') as scope:
        conv4_groups = tf.split(axis=3, value=conv4, num_or_size_splits=2)
        kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 256],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)
        conv_up = tf.nn.conv2d(conv4_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')
        conv_down = tf.nn.conv2d(conv4_groups[1], kernel_groups[1], [1,1,1,1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)
        bias_up = tf.nn.bias_add(conv_up, biases_groups[0])
        bias_down = tf.nn.bias_add(conv_down, biases_groups[1])
        bias = tf.concat(axis=3, values=[bias_up,bias_down])
        conv5 = tf.nn.relu(bias, name=scope)

    # pool5
    with tf.name_scope('pool5') as scope:
        pool5 = tf.nn.max_pool(conv5,
                             ksize=[1, 3, 3, 1],
                             strides=[1, 2, 2, 1],
                             padding='SAME',)
        print('alexNet中最後一層卷積層的形狀是:', pool5.shape)

    # flattened6
    with tf.name_scope('flattened6') as scope:
        flattened = tf.reshape(pool5, shape=[-1, 2*2*256])

    # fc6
    with tf.name_scope('fc6') as scope:
        weights = tf.Variable(tf.truncated_normal([2*2*256, 4096],
                                                  dtype=tf.float32,
                                                  stddev=1e-1), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),
                            trainable=True, name='biases')
        bias = tf.nn.xw_plus_b(flattened, weights, biases)
        fc6 = tf.nn.relu(bias)
    
    # dropout6
    with tf.name_scope('dropout6') as scope:
        dropout6 = tf.nn.dropout(fc6, keep_prob)

    # fc7
    with tf.name_scope('fc7') as scope:
        weights = tf.Variable(tf.truncated_normal([4096,4096],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),
                            trainable=True, name='biases')
        bias = tf.nn.xw_plus_b(dropout6, weights, biases)
        fc7 = tf.nn.relu(bias)

    # dropout7
    with tf.name_scope('dropout7') as scope:
       dropout7 = tf.nn.dropout(fc7, keep_prob)

    # fc8
    with tf.name_scope('fc8') as scope:
        weights = tf.Variable(tf.truncated_normal([4096, num_classes],
                                                  dtype=tf.float32,
                                                  stddev=1e-1), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[num_classes], dtype=tf.float32),
                                        trainable=True, name='biases')
        fc8 = tf.nn.xw_plus_b(dropout7, weights, biases)
"""
Created on Sun Mar 29 15:32:17 2020
https://blog.csdn.net/qq_41776781/article/details/94452085
@author: 90467
"""
import os
import numpy as np
import tensorflow as tf
from alexnetright import alexnet
from datageneratorright import ImageDataGenerator
#from datetime import datetime
import glob
from tensorflow.data import Iterator
import matplotlib.pyplot as plt


# 初始參數設置
img_size=64
learning_rate = 1e-3
num_epochs = 20  # 代的個數 之前是10
train_batch_size = 200 # 之前是1024
    #test_batch_size = 100
dropout_rate = 0.5
num_classes = 2  # 類別標籤
display_step = 2 # display_step個train_batch_size訓練完了就在tensorboard中寫入loss和accuracy
                     # need: display_step <= train_dataset_size / train_batch_size
'''
filewriter_path = "./tmp/tensorboard"  # 存儲tensorboard文件
checkpoint_path = "./tmp/checkpoints"  # 訓練好的模型和參數存放目錄
'''
image_format = 'png' # 數據集的數據類型
file_name_of_class = ['fei','kuang'] # fei對應標籤0,kuang對應標籤1。默認圖片包含獨特的名詞,比如類別
train_dataset_paths = ['D:/wdxrt0305/0310/images/0325/train/fei/','D:/wdxrt0305/0310/images/0325/train/kuang/'] # 指定訓練集數據路徑(根據實際情況指定訓練數據集的路徑)
    #test_dataset_paths = ['G:/Lab/Data_sets/catanddog/test/cat/',
    #                      'G:/Lab/Data_sets/catanddog/test/dog/'] # 指定測試集數據路徑(根據實際情況指定測試數據集的路徑)
    # 注意:默認數據集中的樣本文件名稱中包含其所屬類別標籤的名稱,即file_name_of_class中的名稱
    # 初始參數設置完畢
        

    # 訓練數據集數據處理
train_image_paths = []
train_labels = []
    # 打開訓練數據集目錄,讀取全部圖片,生成圖片路徑列表
for train_dataset_path in train_dataset_paths:
    length = len(train_image_paths)
    train_image_paths[length:length] = np.array(glob.glob(train_dataset_path + '*.' + image_format)).tolist()
for image_path in train_image_paths:
    image_file_name = image_path.split('/')[-1]
    for i in range(num_classes):
        if file_name_of_class[i] in image_file_name:
            train_labels.append(i)
            break
   
    # get Datasets
    # 調用圖片生成器,把訓練集圖片轉換成三維數組
train_data = ImageDataGenerator(
    images=train_image_paths,
    labels=train_labels,
    batch_size=train_batch_size,
    num_classes=num_classes,
    image_format=image_format,
    shuffle=True)
    
# get Iterators
with tf.name_scope('input'):
    # 定義迭代器
    train_iterator = Iterator.from_structure(train_data.data.output_types,
                                    train_data.data.output_shapes)
    training_initalizer=train_iterator.make_initializer(train_data.data)
    #test_iterator = Iterator.from_structure(test_data.data.output_types,test_data.data.output_shapes)
    #testing_initalizer=test_iterator.make_initializer(test_data.data)
    # 定義每次迭代的數據
    train_next_batch = train_iterator.get_next()
    #test_next_batch = test_iterator.get_next()

x = tf.placeholder(tf.float32, [None, img_size, img_size, 3])
y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

# alexnet
fc8 = alexnet(x, keep_prob, num_classes)

# loss
with tf.name_scope('loss'):    
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc8,
                                                              labels=y))
# optimizer
with tf.name_scope('optimizer'):      
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)

# accuracy
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(fc8, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()
'''
# Tensorboard
tf.summary.scalar('loss', loss_op)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(filewriter_path)
'''
#開啓GPU運算
config = tf.ConfigProto() 
config.gpu_options.allow_growth = True 

# 定義一代的迭代次數
train_batches_per_epoch = int(np.floor(train_data.data_size / train_batch_size))
#test_batches_per_epoch = int(np.floor(test_data.data_size / test_batch_size))

allnum=int(np.floor(train_batches_per_epoch*num_epochs))
fig_accuracy = np.zeros(allnum)
fig_loss = np.zeros(allnum)
fig_i=0

sess = tf.Session(config=config)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:    
    for epoch in np.arange(num_epochs):        
        if coord.should_stop():            
            break 
        
        sess.run(training_initalizer)
        print("Epoch number: {} start".format(epoch + 1))

        # train
        for step in range(train_batches_per_epoch):
            img_batch, label_batch = sess.run(train_next_batch)
            #print(img_batch.shape,'\n',label_batch.shape)
            loss,_ = sess.run([loss_op,train_op], feed_dict={x: img_batch,y: label_batch,keep_prob: dropout_rate})
            
            acc = sess.run(accuracy, feed_dict={x: img_batch,y: label_batch,keep_prob: dropout_rate})
            if step % display_step == 0:
                print("index[%s]".center(50,'-')%epoch)
                print("Train: loss:{},accuracy:{}".format(loss,acc))
            saver.save(sess,r'D:/wdxrt0305/0310/xrtmodel/0327jinxingmodel/model')
                
            fig_loss[fig_i] =loss
            fig_accuracy[fig_i] =acc
            fig_i=fig_i+1
        
        
    # 繪製曲線
    _, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    lns1 = ax1.plot(np.arange(allnum), fig_loss, label="Loss")
    # 按一定間隔顯示實現方法# 
    ax2.plot(np.arange(len(fig_accuracy)), fig_accuracy, 'r')
    lns2 = ax2.plot(np.arange(allnum), fig_accuracy, 'r', label="Accuracy")
    ax1.set_xlabel('iteration')
    ax1.set_ylabel('training loss')
    ax2.set_ylabel('training accuracy')
    # 合併圖例
    lns = lns1 + lns2
    labels = ["Loss", "Accuracy"]
    # labels = [l.get_label() for l in lns]
    plt.legend(lns, labels, loc=7)
    plt.show()
except tf.errors.OutOfRangeError:    
    print('Done training -- epoch limit reached') 
    
finally:    
    coord.request_stop()
coord.join(threads)
sess.close()

曲線:

我知道這麼看訓練acc整體是上升的,但震盪較大(調lr好像沒用,據說是因爲每個batch的圖片不一樣,所以有震盪是正常?),但訓練loss變化幅度和訓練acc不同步,這裏loss下降太快,怎麼會這樣?

還不如之前的一個簡單的訓練模型:

這個變化loss與acc是同步的,而且準確率還高些。

今天在學習了這個大神 https://blog.csdn.net/han_xiaoyang/article/list/1?t=1&orderby=UpdateTime 相關文章的分析網絡調節網絡技巧後,我將上圖這個網絡進行了調節,然後曲線圖如下:

藍色綠色分別是訓練和驗證loss,紅色和天藍色分別是訓練和驗證的acc。但是trainloss與valloss曲線非常貼近(幾乎重合),train_acc和val_acc的曲線也非常貼近(幾乎重合)。這種現象正常嗎?我看別人的曲線好像雖然trainloss和valloss都下降,但曲線不會很貼近,train和val的acc曲線也是走勢都上升,但不會貼近??在我現在的認知裏我覺得這個模型學習能力很強,但我將這個模型在測試集上測試時測試效果不如人意,我現在在懷疑是測試集的分佈情況與訓練集驗證集差別很大?我不知道除此之外還應懷疑什麼?但是數據分佈差別不大啊,都是隨機的。我同事說這是過擬合?!

然後我減小模型容量,因爲訓練集就20000,加了L2正則化和dropout,發現又欠擬合,然後我去掉了dropout,只在訓練集加入了L2正則化:

import numpy as np
import tensorflow as tf
from xrtPrecess import get_files_list,get_file, get_batch

import math
from tensorflow.python.framework import graph_util
import matplotlib.pyplot as plt

# 變量聲明
N_CLASSES = 2
IMG_W = 64  # resize圖像,太大的話訓練時間久
IMG_H = 64

train_dir =r'D:\wdxrt0305\0331\train' 
#train, train_label= get_files_list(train_dir)
train, train_label,test,test_label = get_file(train_dir)
trainnum=len(train_label)

lr = tf.placeholder(tf.float32)
learning_rate = lr

def build_network(channel,keep_prob,is_training=True):
    X = tf.placeholder(tf.float32, shape=(None,IMG_W,IMG_H,3), name='input')
    Y = tf.placeholder(tf.int32, shape=(None,N_CLASSES), name='Y')  
    
    def weight_variable(shape, n):    
        initial = tf.truncated_normal(shape, stddev=n, dtype=tf.float32)
        return initial 
    def bias_variable(shape):    
        initial = tf.constant(0.1, shape=shape, dtype=tf.float32)    
        return initial 
    def conv2d(x, W):  
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
    def max_pool_2x2(x, name):    
        return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
    
    
    # 搭建網絡    
    # 第一層卷積    
    # 第一二參數值得卷積核尺寸大小,即patch;第三個參數是通道數;第四個是卷積核個數    
    with tf.variable_scope('conv1') as scope:        
        # 所謂名字的scope,指當綁定了一個名字到一個對象的時候,該名字在程序文本中的可見範圍        
        w_conv1 = tf.Variable(weight_variable([3, 3, 3, 64], 1.0), name='weights', dtype=tf.float32)        
        b_conv1 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)   # 64個偏置值        
        # tf.nn.bias_add 是 tf.add 的一個特例:tf.add(tf.matmul(x, w), b) == tf.matmul(x, w) + b        
        # h_conv1 = tf.nn.relu(tf.nn.bias_add(conv2d(images, w_conv1), b_conv1), name=scope.name) 
        tmp1=conv2d(X, w_conv1)+b_conv1
        #h_conv1 = tf.nn.relu(tf.nn.dropout(tmp1,keep_prob), name='conv1')  # 得到128*128*64(假設原始圖像是128*128) 
        h_conv1 = tf.nn.relu(tmp1, name='conv1')
    
    # 第一層池化    
    # 3x3最大池化,步長strides爲2,池化後執行lrn()操作,局部響應歸一化,增強了模型的泛化能力。    
    # tf.nn.lrn(input,depth_radius=None,bias=None,alpha=None,beta=None,name=None)    
    with tf.variable_scope('pooling1_lrn') as scope:        
        pool1 = max_pool_2x2(h_conv1, 'pooling1')   # 得到64*64*64        
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')     
    
    # 第二層卷積    
    # 32個3x3的卷積核(16通道),padding=’SAME’,表示padding後卷積的圖與原圖尺寸一致,激活函數relu()    
    with tf.variable_scope('conv2') as scope:        
        w_conv2 = tf.Variable(weight_variable([3, 3, 64, 32], 0.1), name='weights', dtype=tf.float32)        
        b_conv2 = tf.Variable(bias_variable([32]), name='biases', dtype=tf.float32)   # 32個偏置值 
        
        tmp2=conv2d(norm1, w_conv2)+b_conv2
        #h_conv2 = tf.nn.relu(tf.nn.dropout(tmp2,keep_prob), name='conv2')  # 得到64*64*32 
        h_conv2 = tf.nn.relu(tmp2, name='conv2')

    # 第二層池化    
    # 3x3最大池化,步長strides爲2,池化後執行lrn()操作    
    with tf.variable_scope('pooling2_lrn') as scope:        
        pool2 = max_pool_2x2(h_conv2, 'pooling2')  # 得到32*32*32        
        norm2 = tf.nn.lrn(pool2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')     
    
    # 第三層卷積    
    # 16個3x3的卷積核(16通道),padding=’SAME’,表示padding後卷積的圖與原圖尺寸一致,激活函數relu()    
    with tf.variable_scope('conv3') as scope:        
        w_conv3 = tf.Variable(weight_variable([3, 3, 32, 16], 0.1), name='weights', dtype=tf.float32)        
        b_conv3 = tf.Variable(bias_variable([16]), name='biases', dtype=tf.float32)   # 16個偏置值  
        
        tmp3=conv2d(norm2, w_conv3)+b_conv3
        #h_conv3 = tf.nn.relu(tf.nn.dropout(tmp3,keep_prob), name='conv3')  # 得到32*32*16
        h_conv3 = tf.nn.relu(tmp3, name='conv3')
        
    # 第三層池化    
    # 3x3最大池化,步長strides爲2,池化後執行lrn()操作    
    with tf.variable_scope('pooling3_lrn') as scope:        
        pool3 = max_pool_2x2(h_conv3, 'pooling3')  # 得到16*16*16        
        norm3 = tf.nn.lrn(pool3, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm3') 
    '''
    with tf.variable_scope('conv4') as scope:        
        w_conv4_1 = tf.Variable(weight_variable([3, 3, 16, 24], 0.1), name='weights', dtype=tf.float32)        
        b_conv4_1 = tf.Variable(bias_variable([24]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv4_1 = tf.nn.relu(conv2d(norm3, w_conv4_1)+b_conv4_1, name='conv4_1')
        
        w_conv4_2 = tf.Variable(weight_variable([5, 5, 16, 24], 0.1), name='weights', dtype=tf.float32)        
        b_conv4_2 = tf.Variable(bias_variable([24]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv4_2 = tf.nn.relu(conv2d(norm3, w_conv4_2)+b_conv4_2, name='conv4_2')
        
        w_conv4_3 = tf.Variable(weight_variable([7, 7, 16, 24], 0.1), name='weights', dtype=tf.float32)        
        b_conv4_3 = tf.Variable(bias_variable([24]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv4_3 = tf.nn.relu(conv2d(norm3, w_conv4_3)+b_conv4_3, name='conv4_3')
        
        h_conv4  = tf.concat((h_conv4_1,h_conv4_2,h_conv4_3),axis=3, name='conv4')
        
    with tf.variable_scope('pooling4_lrn') as scope:        
        pool4 = max_pool_2x2(h_conv4, 'pooling4')  # 得到16*16*16        
        norm4 = tf.nn.lrn(pool4, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm4')
    
    with tf.variable_scope('conv5') as scope:
        
        w_conv5_1 = tf.Variable(weight_variable([1, 1, 24, 64], 0.1), name='weights', dtype=tf.float32)  
        #print(norm4.shape,'  ',w_conv5_1.shape)
        b_conv5_1 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv5_1 = tf.nn.relu(conv2d(norm4, w_conv5_1)+b_conv5_1, name='conv5_1')
        
        w_conv5_2 = tf.Variable(weight_variable([3, 3, 24, 64], 0.1), name='weights', dtype=tf.float32)        
        b_conv5_2 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv5_2 = tf.nn.relu(conv2d(norm4, w_conv5_2)+b_conv5_2, name='conv5_2')
        
        w_conv5_3 = tf.Variable(weight_variable([5, 5, 24, 64], 0.1), name='weights', dtype=tf.float32)        
        b_conv5_3 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv5_3 = tf.nn.relu(conv2d(norm4, w_conv5_3)+b_conv5_3, name='conv5_3')
        
        w_conv5_4 = tf.Variable(weight_variable([7, 7, 24, 64], 0.1), name='weights', dtype=tf.float32)        
        b_conv5_4 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)   # 16個偏置值        
        h_conv5_4 = tf.nn.relu(conv2d(norm4, w_conv5_4)+b_conv5_4, name='conv5_3')
        
        h_conv5  = tf.concat((h_conv5_1,h_conv5_2,h_conv5_3,h_conv5_4),axis=3, name='conv5')
        
    with tf.variable_scope('pooling5_lrn') as scope:        
        pool5 = max_pool_2x2(h_conv5, 'pooling5')  # 得到16*16*16        
        norm5 = tf.nn.lrn(pool5, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm5')

    '''
    # 第四層全連接層    
    # 128個神經元,將之前pool層的輸出reshape成一行,激活函數relu()   
    
    with tf.variable_scope('local3') as scope:
        shape = int(np.prod(norm3.get_shape()[1:])) 
        
        #reshape = tf.reshape(norm3, shape=[BATCH_SIZE, -1])
        pool3_flat = tf.reshape(pool3, [-1, shape])   
        
        #dim = reshape.get_shape()[1].value        
        w_fc1 = tf.Variable(weight_variable([shape, 256], 0.005),  name='weights', dtype=tf.float32)   
        #print(w_fc1," ",pool3_flat.shape)
        b_fc1 = tf.Variable(bias_variable([256]), name='biases', dtype=tf.float32)        
        h_fc1 = tf.nn.relu(tf.matmul(pool3_flat, w_fc1) + b_fc1, name=scope.name)     
        
    # 第五層全連接層    
    # 128個神經元,激活函數relu()    
    with tf.variable_scope('local4') as scope:        
        w_fc2 = tf.Variable(weight_variable([256 ,256], 0.005),name='weights', dtype=tf.float32)        
        b_fc2 = tf.Variable(bias_variable([256]), name='biases', dtype=tf.float32)        
        h_fc2 = tf.nn.relu(tf.matmul(h_fc1, w_fc2) + b_fc2, name=scope.name) 

    # Softmax迴歸層    
    # 將前面的FC層輸出,做一個線性迴歸,計算出每一類的得分,在這裏是6類,所以這個層輸出的是六個得分。
    with tf.variable_scope('softmax_linear') as scope:        
        weights = tf.Variable(weight_variable([256, N_CLASSES], 0.005), name='softmax_linear', dtype=tf.float32)        
        biases = tf.Variable(bias_variable([N_CLASSES]), name='biases', dtype=tf.float32)        
        #softmax_linear = tf.add(tf.matmul(h_fc2_dropout, weights), biases, name='softmax_linear')#0306
        softmax_linear = tf.add(tf.matmul(h_fc2, weights), biases, name='softmax_linear')
    
        # softmax_linear = tf.nn.softmax(tf.add(tf.matmul(h_fc2_dropout, weights), biases, name='softmax_linear'))    
    #return softmax_linear    
    # 最後返回softmax層的輸出
    finaloutput = tf.nn.softmax(softmax_linear, name="softmax")
    
    tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv1)
    tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv2)
    tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv3)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv4_1)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv4_2)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv4_3)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv5_1)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv5_2)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv5_3)
    #tf.add_to_collection(tf.GraphKeys.WEIGHTS, w_conv5_4)
    regularizer = tf.contrib.layers.l2_regularizer(scale=5.0/trainnum)
    reg_term = tf.contrib.layers.apply_regularization(regularizer)
    if is_training==True:
        cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=finaloutput, labels=tf.cast(Y, tf.float32))+reg_term)
    else:
        cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=finaloutput, labels=tf.cast(Y, tf.float32)))
    
    
    #cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=finaloutput, labels=Y))  
    #cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=finaloutput, labels=tf.cast(Y, tf.float32)))
    optimize = tf.train.AdamOptimizer(lr).minimize(cost)     
    prediction_labels = tf.argmax(finaloutput, axis=1, name="output")   
    
    read_labels = tf.argmax(Y, axis=1)
    correct_prediction = tf.equal(prediction_labels, read_labels)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    correct_times_in_batch = tf.reduce_sum(tf.cast(correct_prediction, tf.int32))
    
    return dict(X=X,Y=Y,lr=lr,optimize=optimize,correct_prediction=correct_prediction,
           correct_times_in_batch=correct_times_in_batch,cost=cost,accuracy=accuracy)


show_epochs=2

#train_dir =r'D:\wdxrt0305\0331\train' 
##train, train_label= get_files_list(train_dir)
#train, train_label,test,test_label = get_file(train_dir)
BATCH_SIZE = 256#32#1024     # 每個batch要放多少張圖片
CAPACITY =1024      # 一個隊列最大多少
epochs=50#60

iterations=int(np.ceil(len(train_label) /BATCH_SIZE))
MAX_STEP = epochs*iterations
print("訓練樣本標籤數量爲:%d 個"%len(train_label)) 
print("iterations: ",iterations)
print("step:  ",MAX_STEP)
num=MAX_STEP
#num=int(np.ceil(MAX_STEP / show_epochs))
fig_accuracy = np.zeros(num)
fig_loss = np.zeros(num)
fig_valaccuracy = np.zeros(num)
fig_valloss = np.zeros(num)
fig_i=0

#lr = 0.0001  # 一般小於0.0001 
max_learning_rate = 0.001 #0.0002
min_learning_rate = 0.0000001
#decay_speed = 100.0

train_batch, train_label_batch = get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
vali_batch, vali_label_batch = get_batch(test, test_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
#print(train_batch)
#print(train_label_batch)
keep_prob = tf.placeholder(tf.float32)
is_training = tf.placeholder(tf.bool,name="is_training")  
graph = build_network(channel=3,keep_prob=keep_prob,is_training=is_training)

sess = tf.Session()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:    
    for epoch in np.arange(epochs):        
        if coord.should_stop():            
            break 
        
        for step in np.arange(iterations):
            learning_rate = max_learning_rate - (max_learning_rate-min_learning_rate) * (fig_i/num)
            batch_xs, batch_ys= sess.run([train_batch, train_label_batch])
            batch_valxs, batch_valys= sess.run([vali_batch, vali_label_batch])
            #learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-step/decay_speed)
            accuracy,mean_cost_in_batch,return_correct_times_in_batch,_=sess.run([graph['accuracy'],graph['cost'],graph['correct_times_in_batch'],graph['optimize']], feed_dict={
                graph['X']: batch_xs,
                graph['lr']:learning_rate,
                graph['Y']: batch_ys,
                keep_prob:0.5,
                is_training:True
            })
            accuracyval,mean_cost_val=sess.run([graph['accuracy'],graph['cost']], feed_dict={
                graph['X']: batch_valxs,
                graph['lr']:learning_rate,
                graph['Y']: batch_valys,
                keep_prob:1.0,
                is_training:False
            })
            if step % show_epochs == 0:
                print("epoch[%s]".center(50,'-')%epoch)
                print("iteration[%s]".center(50,'-')%step)
                print("trainloss:{},valloss:{},trainacc:{}, valiacc:{}".format(mean_cost_in_batch,mean_cost_val,accuracy,accuracyval))
                #fig_loss.loc[fig_i] = sess.run([graph['cost']], feed_dict={graph['X']: batch_xs,graph['lr']:learning_rate,graph['Y']: batch_ys})
                #fig_accuracy.loc[fig_i] =sess.run([graph['accuracy']], feed_dict={graph['X']: batch_xs,graph['lr']:learning_rate,graph['Y']: batch_ys})
            fig_loss[fig_i] =mean_cost_in_batch
            fig_accuracy[fig_i] =accuracy
            
            fig_valloss[fig_i] =mean_cost_val
            fig_valaccuracy[fig_i] =accuracyval
            
            fig_i=fig_i+1
            saver.save(sess,r'D:/wdxrt0305/0310/xrtmodel/0401jinxingmodel/model')
        
    # 繪製曲線
    _, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    lns1 = ax1.plot(np.arange(num), fig_loss, label="train Loss")
    lns2=ax1.plot(np.arange(num), fig_valloss, 'green',label="vali Loss")
    # 按一定間隔顯示實現方法# 
    ax2.plot(np.arange(len(fig_accuracy)), fig_accuracy, 'r')
    lns3 = ax2.plot(np.arange(num), fig_accuracy, 'red', label="train Accuracy")
    lns4 = ax2.plot(np.arange(num), fig_valaccuracy, 'skyblue', label="vali Accuracy")
    
    ax1.set_xlabel('iteration')
    ax1.set_ylabel('loss')
    ax2.set_ylabel('accuracy')
    # 合併圖例
    lns = lns1 + lns2+lns3+lns4
    labels = ["train Loss", "vali loss","train acc","vali acc"]
    # labels = [l.get_label() for l in lns]
    plt.legend(lns, labels, loc=7)
    plt.show()

except tf.errors.OutOfRangeError:    
    print('Done training -- epoch limit reached') 
    
finally:    
    coord.request_stop()
coord.join(threads)
sess.close()

其實依舊還是很貼近,幾乎重合,然後測試集上效果不好。爲什麼呢??

2、VGG19

使用不加BN的VGG19,訓練loss一直在0.69,訓練acc一直在0.5左右;然後加入BN後雖然loss一開始不是0.69,但很快收斂到0.69左右,這不對!

import tensorflow as tf  

def bn(x, is_training):    
	return tf.layers.batch_normalization(x, training=is_training)  
	
def maxPoolLayer(x, kHeight, kWidth, strideX, strideY, name, padding="SAME"):    
	return tf.nn.max_pool(x, ksize=[1, kHeight, kWidth, 1],                          
		strides=[1, strideX, strideY, 1], padding=padding, name=name)  
		
def dropout(x, keepPro, name=None):    
	return tf.nn.dropout(x, keepPro, name)  
def fcLayer(x, inputD, outputD, reluFlag, name):    
	with tf.variable_scope(name) as scope:        
		w = tf.get_variable("w", shape=[inputD, outputD], dtype="float")        
		b = tf.get_variable("b", [outputD], dtype="float")        
		out = tf.nn.xw_plus_b(x, w, b, name=scope.name)        
		if reluFlag:            
			return tf.nn.relu(out)        
		else:            
			return out  
			
def convLayer(x, kHeight, kWidth, strideX, strideY, featureNum, name, padding = "SAME"):     
	channel = int(x.get_shape()[-1])    
	with tf.variable_scope(name) as scope:        
		w = tf.get_variable("w", shape=[kHeight, kWidth, channel, featureNum])        
		b = tf.get_variable("b", shape=[featureNum])        
		featureMap = tf.nn.conv2d(x, w, strides=[1, strideY, strideX, 1], padding=padding)        
		out = tf.nn.bias_add(featureMap, b)        
		return tf.nn.relu(tf.reshape(out, featureMap.get_shape().as_list()), name=scope.name)  
		
class VGG19(object):    
	def __init__(self, x, keepPro, classNum, is_training):        
		self.X = x        
		self.KEEPPRO = keepPro        
		self.CLASSNUM = classNum        
		self.is_training = is_training        
		self.begin_VGG_19()     
		
	def begin_VGG_19(self):        
		"""build model"""        
		conv1_1 = convLayer(self.X, 3, 3, 1, 1, 64, "conv1_1" )        
		conv1_1 = bn(conv1_1, self.is_training)         
		conv1_2 = convLayer(conv1_1, 3, 3, 1, 1, 64, "conv1_2")        
		conv1_2 = bn(conv1_2, self.is_training)        
		pool1 = maxPoolLayer(conv1_2, 2, 2, 2, 2, "pool1")         
		conv2_1 = convLayer(pool1, 3, 3, 1, 1, 128, "conv2_1")        
		conv2_1 = bn(conv2_1, self.is_training)         
		conv2_2 = convLayer(conv2_1, 3, 3, 1, 1, 128, "conv2_2")        
		conv2_2 = bn(conv2_2, self.is_training)        
		pool2 = maxPoolLayer(conv2_2, 2, 2, 2, 2, "pool2")         
		conv3_1 = convLayer(pool2, 3, 3, 1, 1, 256, "conv3_1")        
		conv3_1 = bn(conv3_1, self.is_training)         
		conv3_2 = convLayer(conv3_1, 3, 3, 1, 1, 256, "conv3_2")        
		conv3_2 = bn(conv3_2, self.is_training)         
		conv3_3 = convLayer(conv3_2, 3, 3, 1, 1, 256, "conv3_3")        
		conv3_3 = bn(conv3_3, self.is_training)         
		conv3_4 = convLayer(conv3_3, 3, 3, 1, 1, 256, "conv3_4")        
		conv3_4 = bn(conv3_4, self.is_training)        
		pool3 = maxPoolLayer(conv3_4, 2, 2, 2, 2, "pool3")         
		conv4_1 = convLayer(pool3, 3, 3, 1, 1, 512, "conv4_1")        
		conv4_1 = bn(conv4_1, self.is_training)         
		conv4_2 = convLayer(conv4_1, 3, 3, 1, 1, 512, "conv4_2")        
		conv4_2 = bn(conv4_2, self.is_training)                
		conv4_3 = convLayer(conv4_2, 3, 3, 1, 1, 512, "conv4_3")        
		conv4_3 = bn(conv4_3, self.is_training)                
		conv4_4 = convLayer(conv4_3, 3, 3, 1, 1, 512, "conv4_4")        
		conv4_4 = bn(conv4_4, self.is_training)        
		pool4 = maxPoolLayer(conv4_4, 2, 2, 2, 2, "pool4")         
		conv5_1 = convLayer(pool4, 3, 3, 1, 1, 512, "conv5_1")        
		conv5_1 = bn(conv5_1, self.is_training)                
		conv5_2 = convLayer(conv5_1, 3, 3, 1, 1, 512, "conv5_2")        
		conv5_2 = bn(conv5_2, self.is_training)                
		conv5_3 = convLayer(conv5_2, 3, 3, 1, 1, 512, "conv5_3")        
		conv5_3 = bn(conv5_3, self.is_training)                
		conv5_4 = convLayer(conv5_3, 3, 3, 1, 1, 512, "conv5_4")        
		conv5_4 = bn(conv5_4, self.is_training)                
		pool5 = maxPoolLayer(conv5_4, 2, 2, 2, 2, "pool5")        
		print('最後一層卷積層的形狀是:', pool5.shape)         
		fcIn = tf.reshape(pool5, [-1, 4*4*512])        
		fc6 = fcLayer(fcIn, 4*4*512, 4096, True, "fc6")        
		dropout1 = dropout(fc6, self.KEEPPRO)         
		fc7 = fcLayer(dropout1, 4096, 4096, True, "fc7")        
		dropout2 = dropout(fc7, self.KEEPPRO)         
		self.fc8 = fcLayer(dropout2, 4096, self.CLASSNUM, True, "fc8")
#https://blog.csdn.net/qq_41776781/article/details/94452085
import numpy as np
import tensorflow as tf
from vgg19 import VGG19
from datageneratorright import ImageDataGenerator
#from datetime import datetime
import glob
from tensorflow.data import Iterator
import matplotlib.pyplot as plt


# 初始參數設置
img_size=64
learning_rate = 1e-4
num_epochs = 7  # 代的個數 之前是10
train_batch_size = 200 # 之前是1024
    #test_batch_size = 100
dropout_rate = 0.5
num_classes = 2  # 類別標籤
display_step = 2 # display_step個train_batch_size訓練完了就在tensorboard中寫入loss和accuracy
                     # need: display_step <= train_dataset_size / train_batch_size
'''
filewriter_path = "./tmp/tensorboard"  # 存儲tensorboard文件
checkpoint_path = "./tmp/checkpoints"  # 訓練好的模型和參數存放目錄
'''
image_format = 'png' # 數據集的數據類型
file_name_of_class = ['fei','kuang'] # fei對應標籤0,kuang對應標籤1。默認圖片包含獨特的名詞,比如類別
train_dataset_paths = ['D:/wdxrt0305/0310/images/0325/train/fei/','D:/wdxrt0305/0310/images/0325/train/kuang/'] # 指定訓練集數據路徑(根據實際情況指定訓練數據集的路徑)
    #test_dataset_paths = ['G:/Lab/Data_sets/catanddog/test/cat/',
    #                      'G:/Lab/Data_sets/catanddog/test/dog/'] # 指定測試集數據路徑(根據實際情況指定測試數據集的路徑)
    # 注意:默認數據集中的樣本文件名稱中包含其所屬類別標籤的名稱,即file_name_of_class中的名稱
    # 初始參數設置完畢
        

    # 訓練數據集數據處理
train_image_paths = []
train_labels = []
    # 打開訓練數據集目錄,讀取全部圖片,生成圖片路徑列表
for train_dataset_path in train_dataset_paths:
    length = len(train_image_paths)
    train_image_paths[length:length] = np.array(glob.glob(train_dataset_path + '*.' + image_format)).tolist()
for image_path in train_image_paths:
    image_file_name = image_path.split('/')[-1]
    for i in range(num_classes):
        if file_name_of_class[i] in image_file_name:
            train_labels.append(i)
            break
   
    # get Datasets
    # 調用圖片生成器,把訓練集圖片轉換成三維數組
train_data = ImageDataGenerator(
    images=train_image_paths,
    labels=train_labels,
    batch_size=train_batch_size,
    num_classes=num_classes,
    image_format=image_format,
    shuffle=True)
    
# get Iterators
with tf.name_scope('input'):
    # 定義迭代器
    train_iterator = Iterator.from_structure(train_data.data.output_types,
                                    train_data.data.output_shapes)
    training_initalizer=train_iterator.make_initializer(train_data.data)
    #test_iterator = Iterator.from_structure(test_data.data.output_types,test_data.data.output_shapes)
    #testing_initalizer=test_iterator.make_initializer(test_data.data)
    # 定義每次迭代的數據
    train_next_batch = train_iterator.get_next()
    #test_next_batch = test_iterator.get_next()

x = tf.placeholder(tf.float32, [None, img_size, img_size, 3])
y = tf.placeholder(tf.float32, [None, num_classes])
#keep_prob = tf.placeholder(tf.float32)

# Vgg19
model = VGG19(bgr_image=x, num_class=num_classes)

score = model.fc8
train_layer = ['fc8', 'fc7', 'fc6']
# List of trainable variables of the layers we want to train
var_list = [v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layer]


with tf.name_scope('loss'):
    #loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=score, labels=y))
    loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=score, labels=tf.cast(y, tf.float32)))

gradients = tf.gradients(loss_op, var_list)

gradients = list(zip(gradients, var_list))

with tf.name_scope('optimizer'):
    # 優化器,採用梯度下降算法進行優化
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    train_op = optimizer.apply_gradients(grads_and_vars=gradients)

with tf.name_scope("accuracy"):
    # 定義網絡精確度
    correct_pred = tf.equal(tf.argmax(score, 1), tf.argmax(y, 1))
    accuracy = tf.cast(correct_pred, tf.float32)

# 把精確度加入到TensorBoard
#tf.summary.scalar('loss', loss)


init = tf.global_variables_initializer()
'''
# Tensorboard
tf.summary.scalar('loss', loss_op)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(filewriter_path)
'''
#開啓GPU運算
config = tf.ConfigProto() 
config.gpu_options.allow_growth = True 

# 定義一代的迭代次數
train_batches_per_epoch = int(np.floor(train_data.data_size / train_batch_size))
#test_batches_per_epoch = int(np.floor(test_data.data_size / test_batch_size))

allnum=int(np.floor(train_batches_per_epoch*num_epochs))
fig_accuracy = np.zeros(allnum)
fig_loss = np.zeros(allnum)
fig_i=0

sess = tf.Session(config=config)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:    
    for epoch in np.arange(num_epochs):        
        if coord.should_stop():            
            break 
        
        sess.run(training_initalizer)
        print("Epoch number: {} start...".format(epoch + 1))

        # train
        for step in range(train_batches_per_epoch):
            img_batch, label_batch = sess.run(train_next_batch)
            #print(img_batch.shape,'\n',label_batch.shape)
            loss,_ = sess.run([loss_op,train_op], feed_dict={x: img_batch,y: label_batch})
            
            softmax_prediction = sess.run(score, feed_dict={x: img_batch,y: label_batch})
            prediction_label = sess.run(tf.argmax(softmax_prediction, 1))
            actual_label = sess.run(tf.argmax(label_batch, 1))
            rightlabel=0
            for i in range(len(prediction_label)):
                if prediction_label[i] == actual_label[i]:
                    rightlabel += 1

            precision = rightlabel /train_batch_size
            
            if step % display_step == 0:
                print("index[%s]".center(50,'-')%step)
                print("Train: loss:{},accuracy:{}".format(loss,precision))
            saver.save(sess,r'D:/wdxrt0305/0310/xrtmodel/0327jinxingmodel/model')
                
            fig_loss[fig_i] =loss
            fig_accuracy[fig_i] =precision
            fig_i=fig_i+1
        
        
    # 繪製曲線
    _, ax1 = plt.subplots()
    ax2 = ax1.twinx()
    lns1 = ax1.plot(np.arange(allnum), fig_loss, label="Loss")
    # 按一定間隔顯示實現方法# 
    ax2.plot(np.arange(len(fig_accuracy)), fig_accuracy, 'r')
    lns2 = ax2.plot(np.arange(allnum), fig_accuracy, 'r', label="Accuracy")
    ax1.set_xlabel('iteration')
    ax1.set_ylabel('training loss')
    ax2.set_ylabel('training accuracy')
    # 合併圖例
    lns = lns1 + lns2
    labels = ["Loss", "Accuracy"]
    # labels = [l.get_label() for l in lns]
    plt.legend(lns, labels, loc=7)
    plt.show()
except tf.errors.OutOfRangeError:    
    print('Done training -- epoch limit reached') 
    
finally:    
    coord.request_stop()
coord.join(threads)
sess.close()

曲線圖:

這不正確!網絡完全沒有學習能力?!

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 認真的小布丁和認真打遊戲的對象

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章