【深度學習】Grad-CAM 使用 MNIST + LeNet 基於 tensorflow 生成分類器對於數據的位置權重(熱圖 HeatMap)

grad-CAM 使用 MNIST 基於 tensorflow

前言

上一篇文章中,我們給出了在 MNIST + LeNet 的 CAM 實現,但是使用 CAM 時,需要對模型的網絡進行更改,使用 GAP 代替 FC layers,這樣的更改雖然對準確率沒有太大影響(理論上),但會使網絡更難以收斂。並且在很難的訓練的某些模型上,更改網絡是一個巨大的工程。因此我們引入了更方便的 grad-CAM,不需要對網絡進行處理,就可以直接得到數據的 heat map。

Grad-CAM

Grad-CAM 可以在不改變網絡模型的前提下,利用梯度信息得到 heat map。
在這裏插入圖片描述
首先得到最後一個全連接層 yy 層(未經過激活函數),設 cc 爲該數據的類別,將 ycy^cAA (最後一層卷積層) 求梯度,然後在將梯度進行 GAP,平均成一個向量,然後與 AA 對應相乘,最後通過 relu 層後 resize 成原圖大小,再疊加到原圖上。
並且像往常一樣,一個代碼訓練模型,一個代碼輸出熱圖。

效果

在這裏插入圖片描述

訓練代碼

#!/usr/bin/env python
# coding: utf-8

# In[ ]:


import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.system("rm -r logs")
import tensorflow as tf
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt 
from PIL import Image
import multiprocessing


# In[ ]:



TrainPath = '/home/winsoul/disk/MNIST/data/tfrecord/train.tfrecords'
TestPath = '/home/winsoul/disk/MNIST/data/tfrecord/test.tfrecords'
model_path = '/home/winsoul/disk/MNIST/Grad-CAM/model/'


# In[ ]:


def read_tfrecord(TFRecordPath):
    with tf.Session() as sess:
        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
#         filename_queue = tf.train.string_input_producer([TFRecordPath], num_epochs = 1)
        filename_queue = tf.train.string_input_producer([TFRecordPath])
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example, features = feature)
        image = tf.decode_raw(features['image'], tf.float32)
        image = tf.reshape(image, [28, 28, 1])
        label = tf.cast(features['label'], tf.int32)
        return image, label


# In[ ]:


def conv_layer(X, k, s, channels_in, channels_out, name = 'CONV'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([k, k, channels_in, channels_out], stddev = 0.1));
        b = tf.Variable(tf.constant(0.1, shape = [channels_out]))
        conv = tf.nn.conv2d(X, W, strides = [1, s, s, 1], padding = 'SAME')
        result = tf.nn.relu(conv + b)
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        tf.summary.histogram('activations', result)
        return result


# In[ ]:


def pool_layer(X, k, s, strr = 'SAME', pool_type = 'MAX', name = 'pool1'):
    if pool_type == 'MAX':
        result = tf.nn.max_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr,
                              name = name)
    else:
        result = tf.nn.avg_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr,
                              name = name)
    return result


# In[ ]:


def fc_layer(X, neurons_in, neurons_out, last = False, name = 'FC'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev = 0.1))
        b = tf.Variable(tf.constant(0.1, shape = [neurons_out]))
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        if last == False:
            result = tf.nn.relu(tf.matmul(X, W) + b)
        else:
            result =  tf.matmul(X, W) + b
        tf.summary.histogram('activations', result)
        return result


# In[ ]:


def Network(BatchSize, learning_rate):
    with tf.Session() as sess:
        in_training = tf.placeholder(dtype = tf.bool, shape=())
        keep_prob = tf.placeholder('float32', name = 'keep_prob')
        
        judge = tf.Print(in_training, ['in_training:', in_training])
        
        image_train, label_train = read_tfrecord(TrainPath) 
        image_val, label_val = read_tfrecord(TestPath) 
#         image, label = read_tfrecord(TrainPath) if tf.equal(use_train_data, use_train_data_judge) else read_tfrecord(TestPath)
#         image, label = tf.cond(use_train_data, lambda: read_tfrecord(TrainPath), lambda: read_tfrecord(TestPath))     



        image_train_Batch, label_train_Batch = tf.train.shuffle_batch([image_train, label_train], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        image_val_Batch, label_val_Batch = tf.train.shuffle_batch([image_val, label_val], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        
        image_Batch = tf.cond(in_training, lambda: image_train_Batch, lambda: image_val_Batch)
        label_Batch = tf.cond(in_training, lambda: label_train_Batch, lambda: label_val_Batch)
        
        label_Batch = tf.one_hot(label_Batch, depth = 10)
        


        X = tf.identity(image_Batch)
        y = tf.identity(label_Batch)
        
        
        with tf.name_scope('input_reshape'):
            tf.summary.image('input', X, 32)
    
        conv1 = conv_layer(X, 3, 1, 1, 32, name = "conv1")
        conv1_2 = conv_layer(conv1, 3, 1, 32, 32, name = "conv1_2")
        conv1_3 = conv_layer(conv1_2, 3, 1, 32, 32, name = "conv1_3")
        pool1 = pool_layer(conv1_3, 2, 2, "SAME", "MAX", name =  "pool1")

        conv2 = conv_layer(pool1, 3, 1, 32, 64, name = 'conv2')
        conv2_2 = conv_layer(conv2, 3, 1, 64, 64, name = 'conv2_2')
        conv2_3 = conv_layer(conv2_2, 3, 1, 64, 64, name = 'conv2_2')
        pool2 = pool_layer(conv2_3, 2, 2, "SAME", "MAX", name = "pool2")
        print(pool2.shape)

        drop1 = tf.nn.dropout(pool2, keep_prob)
        y_result = fc_layer(tf.reshape(drop1, [-1, 7 * 7 * 64]), 7 * 7 * 64, 10, True)

#         drop2 = tf.nn.dropout(fc1, keep_prob)
#         y_result = fc_layer(drop2, 1024, 10, True)
        
        
        with tf.name_scope('summaries'):
            cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y_result, labels = y))
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
            #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
            corrent_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_result, 1))
            accuracy = tf.reduce_mean(tf.cast(corrent_prediction, 'float', name = 'accuracy'))
            tf.summary.scalar("loss", cross_entropy)
            tf.summary.scalar("accuracy", accuracy)
            
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord = coord)
        
        merge_summary = tf.summary.merge_all()
        summary__train_writer = tf.summary.FileWriter("./logs/train" , sess.graph)
        summary_val_writer = tf.summary.FileWriter("./logs/test")
        
        saver = tf.train.Saver()
        
        try:
            batch_index = 1
            while not coord.should_stop():
                sess.run([train_step], feed_dict = {keep_prob: 0.5, in_training: True})
                if batch_index % 10 == 0:
                    summary_train, _, acc, loss = sess.run([merge_summary, train_step, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, in_training: True})   
                    summary__train_writer.add_summary(summary_train, batch_index) 
                    print(str(batch_index) + 'train:' + '  ' + str(acc) + ' ' + str(loss))
                    summary_val, acc, loss = sess.run([merge_summary, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, in_training: False}) 
                    summary_val_writer.add_summary(summary_val, batch_index) 
                    print(str(batch_index) + ' val: ' + '  ' + str(acc) + ' ' + str(loss))
                if batch_index % 100 == 0:
                    save_path = saver.save(sess, model_path + 'Model__Step_{:08d}'.format(batch_index))
                batch_index += 1;
                    
        except tf.errors.OutOfRangeError:
            print("OutofRangeError!")
        finally:
            print("Finish")
    
        coord.request_stop()
        coord.join(threads)
        sess.close()


# In[ ]:


def main():
    Network(512, 0.0001)


# In[ ]:


if __name__ == '__main__':
    main()



Grad-CAM 代碼

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.system("rm -r logs")
import tensorflow as tf
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt 
from PIL import Image
import multiprocessing
import matplotlib.colors as col
import cv2


# In[2]:


startcolor = '#ff0000'   #紅色
midcolor = '#00ff00'     #綠色
endcolor = '#0000ff'    #藍色
heat = col.LinearSegmentedColormap.from_list('own2',[startcolor,midcolor,endcolor])


# In[3]:



TrainPath = '/home/winsoul/disk/MNIST/data/tfrecord/train.tfrecords'
ValPath = '/home/winsoul/disk/MNIST/data/tfrecord/test.tfrecords'
# BatchSize = 64
model_path = '/home/winsoul/disk/MNIST/Grad-CAM/model/'


# In[4]:


def read_tfrecord(TFRecordPath):
    with tf.Session() as sess:
        feature = {
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
#         filename_queue = tf.train.string_input_producer([TFRecordPath], num_epochs = 1)
        filename_queue = tf.train.string_input_producer([TFRecordPath])
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example, features = feature)
        image = tf.decode_raw(features['image'], tf.float32)
        image = tf.reshape(image, [28, 28, 1])
        label = tf.cast(features['label'], tf.int32)
        return image, label


# In[5]:


def conv_layer(X, k, s, channels_in, channels_out, name = 'CONV'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([k, k, channels_in, channels_out], stddev = 0.1));
        b = tf.Variable(tf.constant(0.1, shape = [channels_out]))
        conv = tf.nn.conv2d(X, W, strides = [1, s, s, 1], padding = 'SAME')
        result = tf.nn.relu(conv + b)
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        tf.summary.histogram('activations', result)
        return result


# In[6]:


def pool_layer(X, k, s, strr = 'SAME', pool_type = 'MAX', name = 'pool'):
    if pool_type == 'MAX':
        result = tf.nn.max_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr,
                              name = name)
    else:
        result = tf.nn.avg_pool(X,
                              ksize = [1, k, k, 1],
                              strides = [1, s, s, 1],
                              padding = strr,
                              name = name)
    return result


# In[7]:


def fc_layer(X, neurons_in, neurons_out, last = False, name = 'FC'):
    with tf.name_scope(name):
        W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev = 0.1))
        b = tf.Variable(tf.constant(0.1, shape = [neurons_out]))
        tf.summary.histogram('weights', W)
        tf.summary.histogram('biases', b)
        if last == False:
            result = tf.nn.relu(tf.matmul(X, W) + b)
        else:
            result =  tf.nn.softmax(tf.matmul(X, W) + b)
        tf.summary.histogram('activations', result)
        return result


# In[8]:


def Network(BatchSize, learning_rate):
    tf.reset_default_graph()
    with tf.Session() as sess:
        in_training = tf.placeholder(tf.bool, name = 'in_training')
        keep_prob = tf.placeholder('float32', name = 'keep_prob')
        
        
        image_train, label_train = read_tfrecord(TrainPath) 
        image_val, label_val = read_tfrecord(ValPath) 

        image_train_Batch, label_train_Batch = tf.train.shuffle_batch([image_train, label_train], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        image_val_Batch, label_val_Batch = tf.train.shuffle_batch([image_val, label_val], 
                                                     batch_size = BatchSize, 
                                                     capacity = BatchSize*3 + 200,
                                                     min_after_dequeue = BatchSize)
        
        image_Batch = tf.cond(in_training, lambda: image_train_Batch, lambda: image_val_Batch)
        label_Batch = tf.cond(in_training, lambda: label_train_Batch, lambda: label_val_Batch)
        
        label_Batch = tf.one_hot(label_Batch, depth = 10)
        

        X = tf.identity(image_Batch)
        y = tf.identity(label_Batch)
#         X = image_Batch
#         y = label_Batch
        
        
        with tf.name_scope('input_reshape'):
            tf.summary.image('input', X, 32)
        
        conv1 = conv_layer(X, 3, 1, 1, 32, name = "conv1")
        conv1_2 = conv_layer(conv1, 3, 1, 32, 32, name = "conv1_2")
        conv1_3 = conv_layer(conv1_2, 3, 1, 32, 32, name = "conv1_3")
        pool1 = pool_layer(conv1_3, 2, 2, "SAME", "MAX", name =  "pool1")

        conv2 = conv_layer(pool1, 3, 1, 32, 64, name = 'conv2')
        conv2_2 = conv_layer(conv2, 3, 1, 64, 64, name = 'conv2_2')
        conv2_3 = conv_layer(conv2_2, 3, 1, 64, 64, name = 'conv2_2')
        pool2 = pool_layer(conv2_3, 2, 2, "SAME", "MAX", name = "pool2")
        print(pool2.shape)

        drop1 = tf.nn.dropout(pool2, keep_prob)
        y_result = fc_layer(tf.reshape(drop1, [-1, 7 * 7 * 64]), 7 * 7 * 64, 10, True)
        
        
        
        with tf.name_scope('summaries'):
            cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y_result, labels = y))
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
            #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
            corrent_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_result, 1))
            accuracy = tf.reduce_mean(tf.cast(corrent_prediction, 'float', name = 'accuracy'))
            tf.summary.scalar("loss", cross_entropy)
            tf.summary.scalar("accuracy", accuracy)
            
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord = coord)
        
        merge_summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter("./logs/train" , sess.graph)
        summary_writer_test = tf.summary.FileWriter("./logs/test")
        
        saver = tf.train.Saver()
        saver.restore(sess, model_path + 'Model__Step_00000500')
        
########################################################################################################################
        
        signal = tf.multiply(y_result, y)
        signal = tf.reduce_mean(signal)
        gradient_y_image = tf.gradients(signal, conv2)[0]
        gradient_y_image = tf.div(gradient_y_image, tf.reduce_max(gradient_y_image) + tf.constant(1e-5))
#         gradient_y_image = tf.div(gradient_y_image, tf.sqrt(tf.reduce_mean(tf.square(gradient_y_image))) + tf.constant(1e-5))
#         
        guided_gradient = tf.gradients(cross_entropy, X)
        
        
#         T1 = tf.image.resize_images(conv2_3, [28, 28], method = 0)
        T1 = conv2
        w1 = gradient_y_image
        g1 = guided_gradient
        
        prediction = tf.argmax(y_result, 1)
        label = tf.argmax(y_result, 1)
########################################################################################################################


        while True:
            T, w, g, loss, predic, label1, image = sess.run([T1, w1, g1, cross_entropy, prediction, label, X], feed_dict = {keep_prob: 1.0, in_training: False})
            print(loss, predic)

            T = np.array(T[0])
            w = np.array(w[0])
            g = np.array(g)

            Tshape = T.shape
            wshape = w.shape

            print("T:", T.shape)
            print("w:", w.shape)

            w = w.mean((0, 1))
            w = w.reshape(wshape[2])
    
            print("T:", T.shape)
            print("w:", w.shape)

            heatmap = np.zeros([Tshape[0], Tshape[1]])
            for i in range(wshape[2]):
                heatmap += w[i] * T[:, :, i]
            
            heatmap = np.maximum(heatmap, 0)
            heatmap = heatmap / (np.max(heatmap) + 1e-5)
            
            heatmap = cv2.resize(heatmap, (28, 28), interpolation = cv2.INTER_LINEAR)
            image = image.reshape(28, 28)
            image = (image + 0.5) * 255
            image = image.astype(np.uint8)
        #         heatmap = cv2.resize(heatmap, [299, 299], interpolation = cv2.INTER_AREA)
            plt.title(str(predic) + str(label1))
            print('image:', image.shape)
            plt.imshow(image)
            plt.imshow(heatmap, cmap = plt.cm.jet, alpha = 0.5, interpolation='bilinear')
            plt.colorbar()
            plt.show()
            
#             print('guided_backpropagation:', g.shape)
#             g = g.reshape([28, 28])
#             g = cv2.resize(g, (28, 28), interpolation = cv2.INTER_LINEAR)
#             g = np.maximum(g, 0)
#             g = g / np.max(g)
#             plt.imshow(g, cmap = 'gray')
#             plt.colorbar()
#             plt.show()

        coord.request_stop()
        coord.join(threads)
        sess.close()


# In[9]:


def main():
    Network(1, 0.0001)


# In[10]:


if __name__ == '__main__':
    main()


# In[ ]:





發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章