keras進行情感分析彙總

 經過一段時間的梳理,今天有時間就把做項目使用的demo寫成博客,對研究自然語言的你們提供一些思路。

代碼都能運行,若有問題,請留言,

# -*- coding: utf-8 -*-
# @Time    : 2019/7/3 9:05
# @Author  : hejipei
# @File    : keras_sentiment.py
""" """

'''好的博客和github'''
# https://github.com/ShawnyXiao/TextClassification-Keras/tree/master/model
# http://www.tensorflownews.com/2018/05/10/keras_gru/
# https://my.oschina.net/u/3800567/blog/2965731
# http://www.voidcn.com/article/p-alhbnusv-bon.html
# https://blog.csdn.net/shu15121856/article/category/8840507
import numpy as np
import pandas as pd
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense,Embedding,Dropout,Flatten
from keras.layers import LSTM,SimpleRNN,Bidirectional,GRU
from keras.layers import GlobalAveragePooling1D
from keras.datasets import imdb
from keras.callbacks import EarlyStopping

def input_data():
    print('Loading data...')
    (x_train,y_train),(x_test,y_test) = imdb.load_data(num_words= max_features )
    print(len(x_train),'train sequences')
    print(len(x_test),'test sequences')
    print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
    print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))
    print('Pad sequences(samples x time)')
    x_train = sequence.pad_sequences(x_train ,maxlen= maxlen )
    x_test = sequence.pad_sequences(x_test ,maxlen= maxlen )

    print('x_train shape:',x_train.shape )
    print('x_test shape:',x_test.shape )

    return x_train,y_train,x_test,y_test

def LSTM_model():
    print('Build LSTM model...')
    model = Sequential()
    model.add(Embedding (max_features ,embed_size,input_length =maxlen)) # 只能作爲模型的第一層 2.5w行的句子,每個詞變成128維度的詞向量,每個句子80個詞
    model.add(Dropout(0.2))
    model.add(LSTM(32))
    model.add(Dense(256,activation= 'relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1,activation= 'sigmoid'))
    model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
    return  model

def BILSTM_model(): #雙向
    print('Build BILSTM model...')
    model = Sequential()
    model.add(Embedding (max_features ,embed_size,input_length =maxlen))
    model.add(Dropout(0.5))
    model.add(Bidirectional(LSTM(32,return_sequences =True),merge_mode ='concat'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(1,activation= 'sigmoid'))
    model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
    return  model

def RNN_model():
    print('Build RNN model...')
    model = Sequential()
    model.add(Embedding(max_features,embed_size,input_length =maxlen))
    model.add(Dropout(0.5))
    model.add(SimpleRNN(16))
    model.add(Dense(256,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
    return model

def DRNN_model():# 雙向
    print('Build DBRNN_ model...')
    model = Sequential()
    model.add(Embedding(max_features,embed_size,input_length =maxlen))
    model.add(Dropout(0.5))
    model.add(Bidirectional(SimpleRNN(16,return_sequences =True),merge_mode ='concat'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
    return model

def DBRNN_model():# 組合
    print('Build DBRNN_ model...')
    model = Sequential()
    model.add(Embedding(max_features,embed_size,input_length =maxlen))
    model.add(Dropout(0.5))
    model.add(Bidirectional(SimpleRNN(16,return_sequences =True),merge_mode ='concat'))
    model.add(SimpleRNN(8))
    model.add(Dropout(0.5))
    model.add(Dense(1,activation='sigmoid'))
    model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
    return model

def GRU_model():
    print('Build GRU model...')
    model = Sequential()
    model.add(Embedding(max_features, embed_size, input_length=maxlen))
    model.add(Dropout(0.2))
    model.add(GRU(32))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def BIGRU_model(): #雙向
    print('Build BIGRU model...')
    model = Sequential()
    model.add(Embedding (max_features ,embed_size,input_length =maxlen))
    model.add(Dropout(0.5))
    model.add(Bidirectional(GRU(32,return_sequences =True),merge_mode ='concat'))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(1,activation= 'sigmoid'))
    model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
    return  model


# 構建模型
def Fast_text_model():
    print('Build Fast_text model...')
    model = Sequential()
    model.add(Embedding(max_features, embed_size,input_length=maxlen))
    model.add(GlobalAveragePooling1D())
    model.add(Dense(1, activation='sigmoid'))
    model.summary()
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def fit_evaluate(model, x_train, y_train, x_test, y_test ):
    early_stopping = EarlyStopping(monitor='val_acc', patience=5) # 增加 EarlyStopping
    model.fit(x_train ,y_train ,
                    batch_size= batch_size ,
                    epochs= epochs,
                    callbacks =[early_stopping],
                    validation_data= (x_test ,y_test ))
    score,acc = model.evaluate(x_test ,y_test ,batch_size= batch_size )
    print('Test score:',score)
    print('Test accuracy:', acc)

if __name__ == "__main__":

    '''
    1.input_data :導入預處理好的數據
    2.xxx_model:構建模型並編譯好
    3.fit_evaluate:訓練並評估模型的預測accuracy值
    '''

    max_features = 25000  # 詞彙表大小
    maxlen = 400  # 序列最大長度
    batch_size = 32  # 批數據量大小
    embed_size = 50  # 詞向量維度
    epochs = 10 # 迭代輪次

    x_train, y_train, x_test, y_test = input_data()

    model = Fast_text_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.889

    # -----------------------------------
    model = LSTM_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.868
    # -----------------------------------
    model = BILSTM_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test)
    # accuracy: 0.88936
    #-----------------------------------
    model = RNN_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.65
    #-----------------------------------
    model = DRNN_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.8718
    #-----------------------------------
    model = DBRNN_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.75036
    #-----------------------------------
    model = GRU_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.86364
    # -----------------------------------
    model = BIGRU_model()
    fit_evaluate(model, x_train, y_train, x_test, y_test )
    # accuracy: 0.8748
    # -----------------------------------


    model.summary()
#

#
# from keras.preprocessing import sequence
# from keras.models import Sequential
# from keras.layers import Dense,Embedding
# from keras.layers import LSTM
# from keras.datasets import imdb
#
# max_features = 20000
# maxlen = 80
# batch_size = 32
#
# print('Loading data...')
# (x_train,y_train),(x_test,y_test) = imdb.load_data(num_words= max_features )
# print(len(x_train),'train sequences')
# print(len(x_test),'test sequences')
# print('Pad sequences(samples x time)')
# x_train = sequence .pad_sequences(x_train ,maxlen= maxlen )
# x_test = sequence .pad_sequences(x_test ,maxlen= maxlen )
#
# print('x_train shape:',x_train .shape )
# print('x_test shape:',x_test .shape )
#
# print('Build model...')
# model = Sequential()
# model.add(Embedding (max_features ,128))#嵌入層將正整數下標轉換爲固定大小的向量。只能作爲模型的第一層
# model.add(LSTM (128,dropout= 0.2,recurrent_dropout= 0.2))
# model.add(Dense(1,activation= 'sigmoid'))
# model.compile(loss= 'binary_crossentropy',optimizer= 'adam',metrics= ['accuracy'])
#
# print('Train...')
#
# model.fit(x_train ,y_train ,batch_size= batch_size ,epochs= 5,validation_data= (x_test ,y_test ))
#
# score,acc = model.evaluate(x_test ,y_test ,batch_size= batch_size )
# print('Test score:',score)
# print('Test accuracy:', acc)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章