Tensorflow 的lstm模塊

tf_version: 1.9.0

實現模塊：

'''
lstm的兩個實現模塊
tf.contrib.rnn.BasicLSTMCell
tf.contrib.rnn.LSTMCell
以上兩個用法一樣，傳入　NUM_UNITS（隱藏單元的個數）返回一個lstm的cell
'''

NUM_UNITS = 128
basic_lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=NUM_UNITS)
lstm_cell = tf.contrib.rnn.LSTMCell(num_units=NUM_UNITS)

運行模塊：

'''
tf.nn.dynamic_rnn
tf.nn.bidirectional_dynamic_rnn
tf.contrib.rnn.static_bidirectional_rnn　多層雙向
以上爲運行lstm的方法，需要傳入lstm的cell以及輸入的INPUT
'''

#　傳入lstm的cell 
# Inputs: [batch, max_time*input_size]
# outputs: [batch, max_time, NUM_UNITS]
outputs, final_state = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=image)


#　傳入lstm的cell 
# Inputs: [batch, max_time*input_size]
# outputs: [batch, max_time, fw_NUM_UNITS + bw_NUM_UNITS]
(outputs_fw, outputs_bw), (outputs_state_fw, outputs_state_bw)= tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, im, dtype=tf.float32)
outputs = tf.concat((outputs_fw, outputs_bw), 2)

# 雙向的lstm實現，需要傳入兩個lstm的cell
# Input_list: 是一個長度爲max_time的list，每個list的shape爲[batch, input_dim]
# outputs: 是一個長度爲max_time的list，每個list的shape爲[batch, fw_NUM_UNITS + bw_NUM_UNITS]
bilstm_Output, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(cell_fw, cell_bw, input_list)

________________some my example________________

def dynamicRNN(rnn_cell, image):
    '''
    :input
        max_time就是這批數據中序列的最長長度，如果輸入的三個句子，那max_time對應的就是最長句子的單詞數量
        cell: 選擇傳入的cell
        inputs: 傳入的數據 [batch, max_time*input_size]
        initial_state: 初始狀態
        time_major: False or True
    :return
        final_state:
            cell.output_size是一個tuple,內容是(c, h)
            一般情況下state的形狀爲 [batch_size, cell.output_size ]
            但當輸入的cell爲BasicLSTMCell時，state的形狀爲[2，batch_size, cell.output_size ]，其中2也對應着LSTM中的cell state和hidden state
        outputs:
            cell爲LSTM，那 state是個tuple，分別代表c和h，其中h與outputs中的對應的最後一個時刻的輸出相等，
            假設state形狀爲[ 2，batch_size, NUM_UNITS ]，outputs形狀爲 [ batch_size, max_time, NUM_UNITS ]，
            那麼state[ 1, batch_size, : ] == outputs[ batch_size, -1, : ]；
            如果cell爲GRU，那麼同理，state其實就是 ，state ==outputs[ -1 ]

            如果time_major==False（默認），outputs形狀爲 [ batch_size, max_time, NUM_UNITS ]
            如果time_major==True，outputs形狀爲 [max_time, batch_size, NUM_UNITS ]（要求rnn輸入與rnn輸出形狀保持一致）
    '''
    outputs, final_state = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=image, initial_state=None, dtype=tf.float32, time_major=False)
    print ('||moyans log -->||  dynamic_rnn, input im.shape: {}, outputs.shape: {}'.format(image.shape, outputs.shape))
    return outputs,final_state

def bidirectional_dynamicRNN(cell_fw, cell_bw, im):
    (outputs_fw, outputs_bw), (outputs_state_fw, outputs_state_bw)= tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, im, dtype=tf.float32)
    outputs = tf.concat((outputs_fw, outputs_bw), 2)
    print ('||moyans log -->||  bidirectional_dynamic_rnn, input im.shape: {}, outputs.shape: {}'.format(image.shape, outputs))
    return outputs

def static_bidirectionalRNN(cell_fw, cell_bw, im):
    '''
    :param cell_fw:
    :param cell_bw:
    :param inputs:
        [batch,max_time,embedding_dim] split [[batch, -1], [batch, -1], .... ]
    :return:
        A tuple (outputs, output_state_fw, output_state_bw)
        outputs是一個長度爲MAX_TIME的list，list中的每個元素(batch, fw_NUM_UNITS + bw_NUM_UNITS)，它們是深度級聯的前向和反向輸出
        output_state_fw是前向RNN的最終狀態，output_state_bw是反向RNN的最終狀態。
    '''
    print ('||moyans log -->||  input im.shape: {}'.format(im.shape))
    input_list = tf.unstack(im, MAX_TIME, axis=1)
    print ('||moyans log -->||  im --> input_list, input_list length is: {}, input_list[0].shape is: {}'.format(len(input_list), input_list[0].shape))
    bilstm_Output, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn(cell_fw, cell_bw, input_list, dtype=tf.float32)
    print ('||moyans log -->||  static_bidirectional_rnn, input im.shape: {}, outputs.shape: {}'.format(np.array(input_list).shape, np.array(bilstm_Output).shape))
    return bilstm_Output, output_state_fw, output_state_bw


一個　static_bidirectional_rnn 用來做mnist分類的example

#!/usr/bin/env python
# name: 　mnist_lstm_bidrectional_classify.py
# -*- coding: utf-8 -*-
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.contrib import rnn

# Data Dimension
num_input = 28          # MNIST data input (image shape: 28x28)
timesteps = 28          # Timesteps
n_classes = 10          # Number of classes, one class per digit

def load_data(mode='train'):
    """
    Function to (download and) load the MNIST data
    :param mode: train or test
    :return: images and the corresponding labels
    """
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    if mode == 'train':
        x_train, y_train, x_valid, y_valid = mnist.train.images, mnist.train.labels, \
                                             mnist.validation.images, mnist.validation.labels
        return x_train, y_train, x_valid, y_valid
    elif mode == 'test':
        x_test, y_test = mnist.test.images, mnist.test.labels
    return x_test, y_test

def randomize(x, y):
    """ Randomizes the order of data samples and their corresponding labels"""
    permutation = np.random.permutation(y.shape[0])
    shuffled_x = x[permutation, :]
    shuffled_y = y[permutation]
    return shuffled_x, shuffled_y

def get_next_batch(x, y, start, end):
    x_batch = x[start:end]
    y_batch = y[start:end]
    return x_batch, y_batch


x_train, y_train, x_valid, y_valid = load_data(mode='train')
print("Size of:")
print("- Training-set:\t\t{}".format(len(y_train)))
print("- Validation-set:\t{}".format(len(y_valid)))

# Hyperparameters
learning_rate = 0.001 # The optimization initial learning rate
epochs = 10           # Total number of training epochs
batch_size = 100      # Training batch size
display_freq = 100    # Frequency of displaying the training results
num_hidden_units = 128  # Number of hidden units of the RNN


# Helper functions for creating new variables
# weight and bais wrappers
def weight_variable(shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.truncated_normal_initializer(stddev=0.01)
    return tf.get_variable('W',
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)

def bias_variable(shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.get_variable('b',
                           dtype=tf.float32,
                           initializer=initial)


def BiRNN(x, weights, biases, timesteps, num_hidden):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, timesteps, 1)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get BiRNN cell output
    outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x,
                                                 dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights) + biases

# Placeholders for inputs (x) and outputs(y)
x = tf.placeholder(tf.float32, shape=[None, timesteps, num_input], name='X')
y = tf.placeholder(tf.float32, shape=[None, n_classes], name='Y')

# create weight matrix initialized randomely from N~(0, 0.01)
W = weight_variable(shape=[2 * num_hidden_units, n_classes])

# create bias vector initialized as zero
b = bias_variable(shape=[n_classes])

output_logits = BiRNN(x, W, b, timesteps, num_hidden_units)
y_pred = tf.nn.softmax(output_logits)

# Model predictions
cls_prediction = tf.argmax(output_logits, axis=1, name='predictions')

# Define the loss function, optimizer, and accuracy
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output_logits), name='loss')
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)
correct_prediction = tf.equal(tf.argmax(output_logits, 1), tf.argmax(y, 1), name='correct_pred')
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy')

# Creating the op for initializing all variables
init = tf.global_variables_initializer()

# Train
sess = tf.InteractiveSession()
sess.run(init)
global_step = 0
# Number of training iterations in each epoch
num_tr_iter = int(len(y_train) / batch_size)
for epoch in range(epochs):
    print('Training epoch: {}'.format(epoch + 1))
    x_train, y_train = randomize(x_train, y_train)
    for iteration in range(num_tr_iter):
        global_step += 1
        start = iteration * batch_size
        end = (iteration + 1) * batch_size
        x_batch, y_batch = get_next_batch(x_train, y_train, start, end)
        x_batch = x_batch.reshape((batch_size, timesteps, num_input))
        # Run optimization op (backprop)
        feed_dict_batch = {x: x_batch, y: y_batch}
        sess.run(optimizer, feed_dict=feed_dict_batch)

        if iteration % display_freq == 0:
            # Calculate and display the batch loss and accuracy
            loss_batch, acc_batch = sess.run([loss, accuracy],
                                             feed_dict=feed_dict_batch)

            print("iter {0:3d}:\t Loss={1:.2f},\tTraining Accuracy={2:.01%}".
                  format(iteration, loss_batch, acc_batch))

    # Run validation after every epoch

    feed_dict_valid = {x: x_valid[:1000].reshape((-1, timesteps, num_input)), y: y_valid[:1000]}
    loss_valid, acc_valid = sess.run([loss, accuracy], feed_dict=feed_dict_valid)
    print('---------------------------------------------------------')
    print("Epoch: {0}, validation loss: {1:.2f}, validation accuracy: {2:.01%}".
          format(epoch + 1, loss_valid, acc_valid))
    print('---------------------------------------------------------')


# Test
def plot_images(images, cls_true, cls_pred=None, title=None):
    """
    Create figure with 3x3 sub-plots.
    :param images: array of images to be plotted, (9, img_h*img_w)
    :param cls_true: corresponding true labels (9,)
    :param cls_pred: corresponding true labels (9,)
    """
    fig, axes = plt.subplots(3, 3, figsize=(9, 9))
    fig.subplots_adjust(hspace=0.3, wspace=0.3)
    for i, ax in enumerate(axes.flat):
        # Plot image.
        ax.imshow(np.squeeze(images[i]).reshape(28, 28), cmap='binary')

        # Show true and predicted classes.
        if cls_pred is None:
            ax_title = "True: {0}".format(cls_true[i])
        else:
            ax_title = "True: {0}, Pred: {1}".format(cls_true[i], cls_pred[i])

        ax.set_title(ax_title)

        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])

    if title:
        plt.suptitle(title, size=20)
    plt.show(block=False)

def plot_example_errors(images, cls_true, cls_pred, title=None):
    """
    Function for plotting examples of images that have been mis-classified
    :param images: array of all images, (#imgs, img_h*img_w)
    :param cls_true: corresponding true labels, (#imgs,)
    :param cls_pred: corresponding predicted labels, (#imgs,)
    """
    # Negate the boolean array.
    incorrect = np.logical_not(np.equal(cls_pred, cls_true))

    # Get the images from the test-set that have been
    # incorrectly classified.
    incorrect_images = images[incorrect]

    # Get the true and predicted classes for those images.
    cls_pred = cls_pred[incorrect]
    cls_true = cls_true[incorrect]

    # Plot the first 9 images.
    plot_images(images=incorrect_images[0:9],
                cls_true=cls_true[0:9],
                cls_pred=cls_pred[0:9],
                title=title)

# Test the network (only on 1000 samples) after training
x_test, y_test = load_data(mode='test')
feed_dict_test = {x: x_test[:1000].reshape((-1, timesteps, num_input)), y: y_test[:1000]}
loss_test, acc_test = sess.run([loss, accuracy], feed_dict=feed_dict_test)
print('---------------------------------------------------------')
print("Test loss: {0:.2f}, test accuracy: {1:.01%}".format(loss_test, acc_test))
print('---------------------------------------------------------')

# Plot some of the correct and misclassified examples
cls_pred = sess.run(cls_prediction, feed_dict=feed_dict_test)
cls_true = np.argmax(y_test, axis=1)
plot_images(x_test, cls_true, cls_pred, title='Correct Examples')
plot_example_errors(x_test[:1000], cls_true[:1000], cls_pred, title='Misclassified Examples')
plt.show()

# close the session after you are done with testing
sess.close()

參考文檔：

https://zhuanlan.zhihu.com/p/28196873

https://sthsf.github.io/2017/08/31/Tensorflow%E5%9F%BA%E7%A1%80%E7%9F%A5%E8%AF%86-bidirectional-rnn/index.html

https://blog.csdn.net/github_39611196/article/details/81066295

http://www.easy-tensorflow.com/tf-tutorials/recurrent-neural-networks/bidirectional-rnn-for-classification