LSTM處理多時間序列數據

處理的數據格式如圖所示，主要是對blktrace抓取的磁盤信息進行處理。

0.001742812,30893,G,R,1180470464,16,[mapkeeper_rocks],1
0.001927242,30893,G,WS,800359816,16,[mapkeeper_rocks],4
0.002208006,30893,G,WS,800359824,8,[mapkeeper_rocks],1
0.002395960,30893,G,WS,800359824,8,[mapkeeper_rocks],1
0.002560709,30893,G,R,170181608,16,[mapkeeper_rocks],1
0.002771388,30893,G,WS,800359824,8,[mapkeeper_rocks],2
0.003061567,30893,G,R,549982936,16,[mapkeeper_rocks],1
0.003267269,30893,G,R,261945208,16,[mapkeeper_rocks],1
0.003524796,30893,G,WS,800359824,8,[mapkeeper_rocks],3
0.003692649,30893,G,R,647880544,16,[mapkeeper_rocks],1
0.003922432,30893,G,WS,800359824,8,[mapkeeper_rocks],4
0.004295463,30893,G,R,1036282136,16,[mapkeeper_rocks],1
0.004456723,30893,G,WS,800359824,8,[mapkeeper_rocks],5
0.004650597,30893,G,R,533219392,16,[mapkeeper_rocks],1
0.004879987,30893,G,WS,800359824,8,[mapkeeper_rocks],6

在這裏第一列表示的是時間戳，設置時間窗口重新進行數據格式修改之後輸入到模型進行訓練，LSTM代碼如下：

import pandas as pd
import os
import numpy as np
from keras.layers import LSTM,Dense,Activation,Dropout
from keras.models import Sequential
from keras.utils import to_categorical
import time
from sklearn.preprocessing import StandardScaler,LabelEncoder,MinMaxScaler
def load_data(filename, seq_len, normalise_window):
    #f = open(filename, 'rb').read()
    #data = f.decode().split('\n')
    dataset = pd.read_csv(filename,header=None)
    values = dataset.values
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    #0.004456723,30893,G,WS,800359824,8,[mapkeeper_rocks],5
    data = np.delete(values, (0, 1, 2, 4, 6), axis=1)#處理resulttocsv.csv時候使用
    #data = np.delete(values, (0, 2), axis=1)
    print(data.shape[0])
    encoder = LabelEncoder()
    data[:,0] = encoder.fit_transform(data[:,0])
    data = data.astype('float32')
    #newData = np.insert(data, 5, values=b, axis=1)
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    sequence_length = seq_len + 1
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length].flatten())
    print(len(result), len(result[0]))
    for i in range(20):
        print(result[i])
    if normalise_window:
        result = normalise_windows(result)

    result = np.array(result)
    col = seq_len * 4
    result = np.delete(result,(0,1),axis=1)
    row = round(0.9 * result.shape[0])
    train = result[:int(row), :]
    np.random.shuffle(train)
    x_train = train[:, :-1]
    y_train = train[:, -1]
    x_test = result[int(row):, :-1]
    y_test = result[int(row):, -1]

    x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
    x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))
    print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
    return x_train, y_train, x_test, y_test

def normalise_windows(window_data):
    normalised_data = []
    count =0
    for window in window_data:
        count = count + 1
        print(count)
        normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
        normalised_data.append(normalised_window)
    else:
        print(str(count), ":window[0] is zero")
    return normalised_data

def build_model(layers):
    model = Sequential()
    model.add(LSTM(
        #input_shape=(layers[0],layers[1]),
        input_shape=(layers[0], layers[1]),
        output_dim=layers[1],
        #output_dim=layers[1],
        return_sequences=True))
    model.add(Dropout(0.2))

    model.add(LSTM(
        layers[2],
        return_sequences=False))
    model.add(Dropout(0.2))

    model.add(Dense(layers[3]))
    model.add(Activation("softmax"))
    start = time.time()
    model.compile(loss='mae', optimizer='adam', metrics=['accuracy'])
    print("> Compilation Time : ", time.time() - start)
    return model
path = os.getcwd()
windows = 100
n_classes = 140
filename = path + '/smallData/resultToCsv.csv'
x_train, y_train, x_test, y_test = load_data(filename, windows, False)
y_train = to_categorical(y_train, n_classes)
y_test = to_categorical(y_test, n_classes)
print("load sucess")
for i in range(20):
    print(x_train[i])
layers = np.array([x_train.shape[1], x_train.shape[2], 100, 140])
model = build_model(layers)
print("build model sucess")
model.fit(x_train, y_train, epochs=20, batch_size=20, verbose=1, validation_data=(x_test, y_test))

測試過程：