tensorflow實現GoogLeNet-InceptionV1

GoogLeNet Inception v1 結構 及 pytorch、tensorflow、keras、paddle實現

環境

python3.6,tensorflow-gpu 1.12.0

代碼

# -*- coding: utf-8 -*- 
# @Time : 2020/2/3 9:56 
# @Author : Zhao HL
# @File : InceptionV1-tensorflow.py 
import sys, cv2, os
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from my_utils import process_show, draw_loss_acc, dataInfo_show, dataset_divide

tf.logging.set_verbosity(tf.logging.ERROR)

# region parameters
# region paths
Data_path = "./data/"
Data_csv_path = "./data/split.txt"
Model_path = 'model/'
Model_file_tf = "model/InceptionV1_tf.ckpt"
Model_file_keras = "model/InceptionV1_keras.h5"
Model_file_torch = "model/InceptionV1_torch.pth"
Model_file_paddle = "model/InceptionV1_paddle.model"
# endregion

# region image parameter
Img_size = 224
Img_chs = 3
Label_size = 1
Label_class = ['agricultural',
               'airplane',
               'baseballdiamond',
               'beach',
               'buildings',
               'chaparral',
               'denseresidential',
               'forest',
               'freeway',
               'golfcourse',
               'harbor',
               'intersection',
               'mediumresidential',
               'mobilehomepark',
               'overpass',
               'parkinglot',
               'river',
               'runway',
               'sparseresidential',
               'storagetanks',
               'tenniscourt']
Labels_nums = len(Label_class)
# endregion

# region net parameter
Conv1_kernel_size = 7
Conv1_chs = 64
Conv21_kernel_size = 1
Conv21_chs = 64
Conv2_kernel_size = 3
Conv2_chs = 192
Icp3a_size = (64, 96, 128, 16, 32, 32)
Icp3b_size = (128, 128, 192, 32, 96, 64)
Icp4a_size = (192, 96, 208, 16, 48, 64)
Icp4b_size = (160, 112, 224, 24, 64, 64)
Icp4c_size = (128, 128, 256, 24, 64, 64)
Icp4d_size = (112, 144, 288, 32, 64, 64)
Icp4e_size = (256, 160, 320, 32, 128, 128)
Icp5a_size = (256, 160, 320, 32, 128, 128)
Icp5b_size = (384, 192, 384, 48, 128, 128)
Out_chs1 = 128
Out_chs2 = 1024
# endregion

# region hpyerparameter
Learning_rate = 1e-3
Batch_size = 16
Buffer_size = 256
Infer_size = 1
Epochs = 20
Train_num = 1470
Train_batch_num = Train_num // Batch_size
Val_num = 210
Val_batch_num = Val_num // Batch_size
Test_num = 420
Test_batch_num = Test_num // Batch_size


# endregion

# endregion

class MyDataset():
    def __init__(self, root_path, batch_size, files_list=None):
        self.root_path = root_path
        self.batch_size = batch_size
        self.files_list = files_list if files_list else os.listdir(root_path)
        self.size = len(files_list)
        np.random.shuffle(self.files_list)

    def __len__(self):
        return self.size

    def __getitem__(self, batch_index):
        images, labels = [], []
        start_index = batch_index * self.batch_size
        end_index = (batch_index + 1) * self.batch_size
        for index in range(start_index, end_index):
            label_str = os.path.basename(self.files_list[index])[:-6]
            label = Label_class.index(label_str)
            img = Image.open(os.path.join(self.root_path, self.files_list[index]))
            img, label = self.transform(img, label)
            images.append(img)
            labels.append(label)
        images = np.array(images)
        labels = np.array(labels)

        if batch_index == self.size // self.batch_size - 1:
            np.random.shuffle(self.files_list)
        return images, labels

    def transform(self, image, label):
        def Normalize(image, means, stds):
            for band in range(len(means)):
                image[:, :, band] = image[:, :, band] / 255.0
                image[:, :, band] = (image[:, :, band] - means[band]) / stds[band]
            return image

        def ToOnehot(labels):
            labels = np.eye(Labels_nums)[labels].reshape(Labels_nums)
            return labels

        pass
        image = image.resize((Img_size, Img_size), Image.BILINEAR)
        image = Normalize(np.array(image).astype(np.float), [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        label = ToOnehot(label)
        return image, label


class InceptionV1:
    def __init__(self, structShow=False,trainModel=True):
        self.structShow = structShow
        self.trainModel = trainModel
        self.image = tf.placeholder(tf.float32, [Batch_size, Img_size, Img_size, Img_chs])
        self.label = tf.placeholder(tf.float32, [Batch_size, Labels_nums])
        self.predict = self.getNet()


    def get_w(self, shape):
        if np.size(shape) == 2:
            n = shape[0] + shape[1]
        else:
            n = shape[0] * shape[1] * shape[2]
        stddev = np.sqrt(2.0 / n)
        return tf.Variable(tf.truncated_normal(shape, mean=0, stddev=stddev), trainable=True, name='w')

    def get_b(self, shape):
        return tf.Variable(tf.zeros(shape), name='b')

    def InceptionV1_Model(self, input, input_chs, model_size):
        con11_chs, con31_chs, con3_chs, con51_chs, con5_chs, pool11_chs = model_size
        with tf.name_scope('conv1'):
            conv11_w = self.get_w([1, 1, input_chs, con11_chs])
            conv11_b = self.get_b([con11_chs])
            conv11 = tf.nn.conv2d(input, conv11_w, strides=[1, 1, 1, 1], padding='SAME')
            relu11 = tf.nn.relu(tf.nn.bias_add(conv11, conv11_b))

        with tf.name_scope('conv3'):
            conv31_w = self.get_w([1, 1, input_chs, con31_chs])
            conv31_b = self.get_b([con31_chs])
            conv31 = tf.nn.conv2d(input, conv31_w, strides=[1, 1, 1, 1], padding='SAME')
            relu31 = tf.nn.relu(tf.nn.bias_add(conv31, conv31_b))

            conv3_w = self.get_w([3, 3, con31_chs, con3_chs])
            conv3_b = self.get_b([con3_chs])
            conv3 = tf.nn.conv2d(relu31, conv3_w, strides=[1, 1, 1, 1], padding='SAME')
            relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_b))

        with tf.name_scope('conv5'):
            conv51_w = self.get_w([1, 1, input_chs, con51_chs])
            conv51_b = self.get_b([con51_chs])
            conv51 = tf.nn.conv2d(input, conv51_w, strides=[1, 1, 1, 1], padding='SAME')
            relu51 = tf.nn.relu(tf.nn.bias_add(conv51, conv51_b))

            conv5_w = self.get_w([5, 5, con51_chs, con5_chs])
            conv5_b = self.get_b([con5_chs])
            conv5 = tf.nn.conv2d(relu51, conv5_w, strides=[1, 1, 1, 1], padding='SAME')
            relu5 = tf.nn.relu(tf.nn.bias_add(conv5, conv5_b))

        with tf.name_scope('pool'):
            pool1 = tf.nn.max_pool(input, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1], padding='SAME')
            conv1_w = self.get_w([1, 1, input_chs, pool11_chs])
            conv1_b = self.get_b([pool11_chs])
            conv1 = tf.nn.conv2d(pool1, conv1_w, strides=[1, 1, 1, 1], padding='SAME')
            relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_b))
        return relu11, relu3, relu5, relu1

    def InceptionV1_Out(self, input, input_chs):
        pool = tf.nn.avg_pool(input, ksize=[1, 5, 5, 1], strides=[1, 3, 3, 1], padding='VALID')

        conv_w = self.get_w([1, 1, input_chs, Out_chs1])
        conv_b = self.get_b([Out_chs1])
        conv = tf.nn.conv2d(pool, conv_w, strides=[1, 1, 1, 1], padding='SAME')
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv_b))

        flatten = tf.reshape(relu, [relu.shape[0], -1])
        dropout1 = tf.nn.dropout(flatten, keep_prob=0.7)
        fc1_w = self.get_w([int(dropout1.shape[1]), Out_chs2])
        fc1 = tf.matmul(dropout1, fc1_w)

        dropout2 = tf.nn.dropout(fc1, keep_prob=0.7)
        fc2_w = self.get_w([int(dropout2.shape[1]), Labels_nums])
        fc2 = tf.matmul(dropout2, fc2_w)
        out = fc2

        return out

    def getNet(self):
        with tf.name_scope('conv'):
            conv1_w = self.get_w([Conv1_kernel_size, Conv1_kernel_size, Img_chs, Conv1_chs])
            conv1_b = self.get_b([Conv1_chs])
            conv1 = tf.nn.conv2d(self.image, conv1_w, strides=[1, 2, 2, 1], padding='SAME')
            relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_b))
            pool1 = tf.nn.max_pool(relu1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

            conv21_w = self.get_w([1, 1, Conv1_chs, Conv21_chs])
            conv21_b = self.get_b([Conv21_chs])
            conv21 = tf.nn.conv2d(pool1, conv21_w, strides=[1, 1, 1, 1], padding='SAME')
            relu21 = tf.nn.relu(tf.nn.bias_add(conv21, conv21_b))

            conv2_w = self.get_w([1, 1, Conv21_chs, Conv2_chs])
            conv2_b = self.get_b([Conv2_chs])
            conv2 = tf.nn.conv2d(relu21, conv2_w, strides=[1, 1, 1, 1], padding='SAME')
            relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_b))
            pool2 = tf.nn.max_pool(relu2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        with tf.name_scope('inception3a'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(pool2, 192, Icp3a_size)
            inception3a = tf.concat([conv1, conv3, conv5, pool], 3)

        with tf.name_scope('inception3b'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(inception3a, 256, Icp3b_size)
            inception3b = tf.concat([conv1, conv3, conv5, pool], 3)
            pool3 = tf.nn.max_pool(inception3b, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        with tf.name_scope('inception4a'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(pool3, 480, Icp4a_size)
            inception4a = tf.concat([conv1, conv3, conv5, pool], 3)
            if self.trainModel == True:
                output1 = self.InceptionV1_Out(inception4a, 512)

        with tf.name_scope('inception4b'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(inception4a, 512, Icp4b_size)
            inception4b = tf.concat([conv1, conv3, conv5, pool], 3)

        with tf.name_scope('inception4c'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(inception4b, 512, Icp4c_size)
            inception4c = tf.concat([conv1, conv3, conv5, pool], 3)

        with tf.name_scope('inception4d'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(inception4c, 512, Icp4d_size)
            inception4d = tf.concat([conv1, conv3, conv5, pool], 3)
            if self.trainModel == True:
                output2 = self.InceptionV1_Out(inception4d, 528)

        with tf.name_scope('inception4e'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(inception4d, 528, Icp4e_size)
            inception4e = tf.concat([conv1, conv3, conv5, pool], 3)
            pool4 = tf.nn.max_pool(inception4e, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        with tf.name_scope('inception5a'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(pool4, 832, Icp5a_size)
            inception5a = tf.concat([conv1, conv3, conv5, pool], 3)

        with tf.name_scope('inception5b'):
            conv1, conv3, conv5, pool = self.InceptionV1_Model(inception5a, 832, Icp5b_size)
            inception5b = tf.concat([conv1, conv3, conv5, pool], 3)
            pool5 = tf.nn.avg_pool(inception5b, ksize=[1, 7, 7, 1], strides=[1, 1, 1, 1], padding='VALID')

        with tf.name_scope('output'):

            flatten = tf.reshape(pool5, [pool5.shape[0], -1])
            dropout = tf.nn.dropout(flatten, keep_prob=0.6)
            fc_w = self.get_w([int(dropout.shape[1]), Labels_nums])
            fc = tf.matmul(dropout, fc_w)
            output = fc
            # output = tf.nn.softmax(tf.nn.bias_add(fc, fc_b))

        if self.structShow:
            print(relu1.name, relu1.shape)
            print(pool1.name, pool1.shape)
            print(relu2.name, relu2.shape)
            print(pool2.name, pool2.shape)

            print(inception3a.name, inception3a.shape)
            print(inception3b.name, inception3b.shape)
            print(pool3.name, pool3.shape)

            print(inception4a.name, inception4a.shape)
            if self.trainModel == True:
                print(output1.name, output1.shape)
            print(inception4b.name, inception4b.shape)
            print(inception4c.name, inception4c.shape)
            print(inception4d.name, inception4d.shape)
            if self.trainModel == True:
                print(output2.name, output2.shape)
            print(inception4e.name, inception4e.shape)
            print(pool4.name, pool4.shape)

            print(inception5a.name, inception5a.shape)
            print(inception5b.name, inception5b.shape)
            print(pool5.name, pool5.shape)

            print(flatten.name, flatten.shape)
            print(fc.name, fc.shape)
            print(output.name, output.shape)
            if self.trainModel == True:
                return [output, output1, output2]
        return output


def train():
    df = pd.read_csv(Data_csv_path, header=0, index_col=0)
    train_list = df[df['split'] == 'train']['filename'].tolist()
    val_list = df[df['split'] == 'val']['filename'].tolist()
    train_dataset = MyDataset(Data_path, batch_size=Batch_size, files_list=train_list)
    val_dataset = MyDataset(Data_path, batch_size=Batch_size, files_list=val_list)
    net = InceptionV1(structShow=True)
    image, label, predict = net.image, net.label, net.predict

    train_loss = tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[0], 1e-15, 1.0)), reduction_indices=1))
    # train_loss = 0.6 * tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[0],1e-15,1.0)), reduction_indices=1))\
    #              + 0.2 * tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[1],1e-15,1.0)), reduction_indices=1))\
    #              + 0.2 * tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[2],1e-15,1.0)), reduction_indices=1))


    val_loss = train_loss
    # val_loss = 1 * tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[0],1e-15,1.0)), reduction_indices=1))\
    #              + 0 * tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[1],1e-15,1.0)), reduction_indices=1))\
    #              + 0 * tf.reduce_mean(-tf.reduce_sum(label * tf.log(tf.clip_by_value(predict[2],1e-15,1.0)), reduction_indices=1))

    run_step = tf.train.AdamOptimizer(Learning_rate).minimize(train_loss)
    correct = tf.equal(tf.argmax(predict[0], 1), tf.argmax(label, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        model = tf.train.get_checkpoint_state(Model_file_tf)
        # if model and model.model_checkpoint_path:
        #     saver.restore(sess, model.model_checkpoint_path)

        train_losses = np.ones(Epochs)
        train_accs = np.ones(Epochs)
        val_losses = np.ones(Epochs)
        val_accs = np.ones(Epochs)
        best_loss = float("inf")
        best_loss_epoch = 0
        for epoch in range(Epochs):
            print('Epoch %d/%d:' % (epoch + 1, Epochs))
            train_sum_loss = 0
            train_sum_acc = 0
            val_sum_loss = 0
            val_sum_acc = 0
            for batch, (images, labels) in enumerate(train_dataset):
                train_acc, t_loss, pre,cor,_ = sess.run(
                    [accuracy, train_loss,predict, correct,run_step], feed_dict={image: images, label: labels})
                process_show(batch + 1, Train_batch_num, train_acc, t_loss, prefix='train:')
                train_sum_acc += train_acc
                train_sum_loss += t_loss

            for batch, (images, labels) in enumerate(val_dataset):
                val_acc, v_loss = sess.run([accuracy, val_loss], feed_dict={image: images, label: labels})
                process_show(batch + 1, Val_batch_num, val_acc, v_loss, prefix='val:')
                val_sum_acc += val_acc
                val_sum_loss += v_loss

            train_sum_loss /= Train_batch_num
            train_sum_acc /= Train_batch_num
            val_sum_loss /= Val_batch_num
            val_sum_acc /= Val_batch_num

            train_losses[epoch] = train_sum_loss
            train_accs[epoch] = train_sum_acc
            val_losses[epoch] = val_sum_loss
            val_accs[epoch] = val_sum_acc
            print('average summary:\ntrain acc %.4f, loss %.4f ; val acc %.4f, loss %.4f'
                  % (train_sum_acc, train_sum_loss, val_sum_acc, val_sum_loss))

            if val_sum_loss < best_loss:
                print('val_loss improve from %.4f to %.4f, model save to %s ! \n' % (
                    best_loss, val_sum_loss, Model_file_tf))
                best_loss = val_sum_loss
                best_loss_epoch = epoch + 1
                saver.save(sess=sess, save_path=Model_file_tf)
            else:
                print('val_loss do not improve from %.4f \n' % (best_loss))
        print('best loss %.4f at epoch %d \n' % (best_loss, best_loss_epoch))
        draw_loss_acc(train_losses, train_accs, 'train')
        draw_loss_acc(val_losses, val_accs, 'val')


if __name__ == '__main__':
    pass
    # dataset_divide(r'E:\_Python\01_deeplearning\04_GoogLeNet\Inception1\data\split.txt')
    train()

my_utils.py

# -*- coding: utf-8 -*- 
# @Time : 2020/1/21 11:39 
# @Author : Zhao HL
# @File : my_utils.py
import sys,os,random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
def process_show(num, nums, train_acc, train_loss, prefix='', suffix=''):
    rate = num / nums
    ratenum = int(round(rate, 2) * 100)
    bar = '\r%s batch %3d/%d:train accuracy %.4f, train loss %00.4f [%s%s]%.1f%% %s; ' % (
        prefix, num, nums, train_acc, train_loss, '#' * (ratenum//2), '_' * (50 - ratenum//2), ratenum, suffix)
    sys.stdout.write(bar)
    sys.stdout.flush()
    if num >= nums:
        print()

def dataInfo_show(data_path,csv_pth,cls_dic_path,shapesShow=True,classesShow=True):
    cls_dict = get_cls_dic(cls_dic_path)
    if classesShow:
        print('\n'+'*'*50)
        df = pd.read_csv(csv_pth)
        labels = df['label'].unique()
        label_cls = {label:cls_dict[label] for label in labels}
        print(label_cls)
        cls_count = df['label'].value_counts()
        cls_count = {cls_dict[k]:v for k,v in cls_count.items()}
        for k,v in cls_count.items():
            print(k,v)

    if shapesShow:
        print('\n'+'*'*50)
        shapes = []
        for filename in os.listdir(data_path):
            img = Image.open(os.path.join(data_path, filename))
            img = np.array(img)
            shapes.append(img.shape)
        shapes = pd.Series(shapes)
        print(shapes.value_counts())

def get_cls_dic(cls_dic_path):
    # 讀取類標籤字典,只取第一個逗號前的信息
    cls_df = pd.read_csv(cls_dic_path)
    cls_df['cls'] = cls_df['info'].apply(lambda x:x[:9]).tolist()
    cls_df['label'] = cls_df['info'].apply(lambda x: x[10:]).tolist()
    cls_df = cls_df.drop(columns=['info','other'])

    cls_dict = cls_df.set_index('cls').T.to_dict('list')
    cls_dict = {k:v[0] for k,v in cls_dict.items()}
    return cls_dict

def dataset_divide(csv_pth):
    cls_df = pd.read_csv(csv_pth, header=0,index_col=0)
    cls_df.insert(1,'split',None)
    filenames = list(cls_df['filename'])
    random.shuffle(filenames)
    train_num,train_val_num = int(len(filenames)*0.7),int(len(filenames)*0.8)
    train_names = filenames[:train_num]
    val_names = filenames[train_num:train_val_num]
    test_names = filenames[train_val_num:]
    cls_df.loc[cls_df['filename'].isin(train_names),'split'] = 'train'
    cls_df.loc[cls_df['filename'].isin(val_names), 'split'] = 'val'
    cls_df.loc[cls_df['filename'].isin(test_names), 'split'] = 'test'
    cls_df.to_csv(csv_pth)

def draw_loss_acc(loss,acc,type='',save_path=None):
    assert len(acc) == len(loss)
    x = [epoch for epoch in range(len(acc))]
    plt.subplot(2, 1, 1)
    plt.plot(x, acc, 'o-')
    plt.title(type+'  accuracy vs. epoches')
    plt.ylabel('accuracy')
    plt.subplot(2, 1, 2)
    plt.plot(x, loss, '.-')
    plt.xlabel(type+'  loss vs. epoches')
    plt.ylabel('loss')
    plt.show()
    if save_path:
        plt.savefig(os.path.join(save_path,type+"_acc_loss.png"))


if __name__ == '__main__':
    pass

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章