tensorflow實現循環神經網絡

一般的RNN存在長期依賴問題,爲了解決這個問題,LSTM被設計出來

關於LSTM可以參考http://www.jianshu.com/p/9dc9f41f0b29

1、這裏給出tensorflow使用LSTM的框架

#coding:utf-8

import tensorflow as tf 
from tensorflow.contrib.rnn import core_rnn_cell

lstm = core_rnn_cell.BasicLSTMCell(lstm_hidden_size)#隱藏層

state = lstm.zero_state(batch_size,tf.float32)#將狀態初始化爲全0數組

loss = 0.0

#爲了避免梯度消散,規定一個最大的序列長度,num_step
for i in range(num_steps):
    if i>0:
        tf.get_variable_scope().reuse_variables()#在第一個時刻聲明lstm結構中使用的變量,在之後的時刻都需要重複使用之前定義好的變量

    lstm_output,state = lstm(current_input,state)

    final_output = fully_connected(lstm_output)

    loss += calc_loss(final_output,expeted_output)

2、深度LSTM

#coding:utf-8
import tensorflow as tf

lstm = tf.contrib.rnn.core_rnn_cell.BasicLSTMCell(lstm_size)
#lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)


#深層循環神經網絡
#number_of_layers表示有多少層
stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm]*number_of_layers)

state = stacked_lstm.zero_state(batch_size,tf.float32)

for i in range(len(num_steps)):
    if i >0:
        tf.get_variable_scope().reuse_variables()

    stacked_lstm_output,state = stacked_lstm(current_input,state)

    final_output = fully_connected(stacked_lstm_output)

    loss += calc_loss(final_output,expected_output)

3、加上Dropout

#coding:utf-8
#創建一個帶有dropout的深層lstm
import tensorflow as tf 

lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)

dropput_lstm = tf.contrib.rnn.DropoutWrapper(lstm,output_keep_prob =0.5)

stacked_lstm = tf.contrib.rnn.MultiRNNCell([dropout_lstm]*number_of_layers)

for i in range(num_step):
    if i>0:
        tf.get_variable_scope().reuse_variables()

    output,state = stacked_lstm(current_input,state)
    final_output = final_connected(output)
    loss += calc_loss(final_output,expected_output)

4、簡單應用:預測sin函數

#coding:utf-8

import tensorflow as tf
import numpy as np 
import matplotlib as mpl 
mpl.use('Agg')

from matplotlib import pyplot as plt

learn = tf.contrib.learn

HIDDEN_SIZE = 30
NUM_LAYERS = 2

TIMESTEPS = 10
TRAINING_STEPS = 10000

BATCH_SIZE = 32

TRAIN_EXAMPLES = 10000

TESTING_EXAMPLES = 1000

SAMPLE_GAP = 0.01

def generate_data(seq):
    X = []
    y = []


    for i in range(len(seq)-TIMESTEPS-1):
        X.append(seq[i:i+TIMESTEPS])
        y.append(seq[i+TIMESTEPS])

    return np.array(X,dtype=np.float32),np.array(y,dtype=np.float32)

def lstm_model(X,y):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(HIDDEN_SIZE)
    cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*NUM_LAYERS)

    x_=tf.unstack(X,axis = 1)#a=tf.unpack([[1,2,3],[1,25,6]],axis=1),
    #[array([1, 1], dtype=int32), array([ 2, 25], dtype=int32), array([3, 6], dtype=int32)]
    ###########################################################

    output,_ = tf.contrib.rnn(cell,x_,dtype=tf.float32)
    output = output[-1]#取最後一個值

    ###########################################################

    prediction,loss = learn.LinearRegressor(output,y)#

    #優化器的新定義方法
    train_op = tf.contrib.layers.optimize_loss(loss,tf.contrib.frameworks.get_global_step(),optimizer = "Adagrad",learning_rate=0.1)

    return prediction ,loss,train_op


regressor = learn.Estimator(model_fn = lstm_model)

test_start = TRAIN_EXAMPLES*SAMPLE_GAP
test_end = (TRAIN_EXAMPLES+TESTING_EXAMPLES)*SAMPLE_GAP

train_X,train_y = generate_data(np.sin(np.linspace(0,test_start,TRAIN_EXAMPLES,dtype=np.float32)))

test_X,test_y = generate_data(np.sin(np.linspace(test_start,test_end,TESTING_EXAMPLES,dtype=np.float32)))

#fit一句話用來訓練
regressor.fit(train_X,train_y,batch_size=BATCH_SIZE,steps = TRAINING_STEPS)

prcdicted = [[pred] for pred in regressor.predict(test_X)]

rmse = np.sqrt(((predicted-test_y)**2).mean(axis=0))

print "誤差爲  ","rmse"

fig = plt.figure()
plot_predicted = plt.plot(predicted,label = "predicted")
plot_test = plt(test_y,label = "real_sin")
plt.legend([plot_predicted,plot_test],["predicted","real_sin"])

fig.savefig("sin.png")

5、實際上在NLP中,循環神經網絡應用是比較多的。循環神經網絡可以捕獲上下文信息,可以進行語言建模

發佈了32 篇原創文章 · 獲贊 29 · 訪問量 4萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章