處理的數據格式如圖所示,主要是對blktrace抓取的磁盤信息進行處理。
0.001742812,30893,G,R,1180470464,16,[mapkeeper_rocks],1
0.001927242,30893,G,WS,800359816,16,[mapkeeper_rocks],4
0.002208006,30893,G,WS,800359824,8,[mapkeeper_rocks],1
0.002395960,30893,G,WS,800359824,8,[mapkeeper_rocks],1
0.002560709,30893,G,R,170181608,16,[mapkeeper_rocks],1
0.002771388,30893,G,WS,800359824,8,[mapkeeper_rocks],2
0.003061567,30893,G,R,549982936,16,[mapkeeper_rocks],1
0.003267269,30893,G,R,261945208,16,[mapkeeper_rocks],1
0.003524796,30893,G,WS,800359824,8,[mapkeeper_rocks],3
0.003692649,30893,G,R,647880544,16,[mapkeeper_rocks],1
0.003922432,30893,G,WS,800359824,8,[mapkeeper_rocks],4
0.004295463,30893,G,R,1036282136,16,[mapkeeper_rocks],1
0.004456723,30893,G,WS,800359824,8,[mapkeeper_rocks],5
0.004650597,30893,G,R,533219392,16,[mapkeeper_rocks],1
0.004879987,30893,G,WS,800359824,8,[mapkeeper_rocks],6
在這裏第一列表示的是時間戳,設置時間窗口重新進行數據格式修改之後輸入到模型進行訓練,LSTM代碼如下:
import pandas as pd
import os
import numpy as np
from keras.layers import LSTM,Dense,Activation,Dropout
from keras.models import Sequential
from keras.utils import to_categorical
import time
from sklearn.preprocessing import StandardScaler,LabelEncoder,MinMaxScaler
def load_data(filename, seq_len, normalise_window):
#f = open(filename, 'rb').read()
#data = f.decode().split('\n')
dataset = pd.read_csv(filename,header=None)
values = dataset.values
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
#0.004456723,30893,G,WS,800359824,8,[mapkeeper_rocks],5
data = np.delete(values, (0, 1, 2, 4, 6), axis=1)#處理resulttocsv.csv時候使用
#data = np.delete(values, (0, 2), axis=1)
print(data.shape[0])
encoder = LabelEncoder()
data[:,0] = encoder.fit_transform(data[:,0])
data = data.astype('float32')
#newData = np.insert(data, 5, values=b, axis=1)
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
sequence_length = seq_len + 1
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length].flatten())
print(len(result), len(result[0]))
for i in range(20):
print(result[i])
if normalise_window:
result = normalise_windows(result)
result = np.array(result)
col = seq_len * 4
result = np.delete(result,(0,1),axis=1)
row = round(0.9 * result.shape[0])
train = result[:int(row), :]
np.random.shuffle(train)
x_train = train[:, :-1]
y_train = train[:, -1]
x_test = result[int(row):, :-1]
y_test = result[int(row):, -1]
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
return x_train, y_train, x_test, y_test
def normalise_windows(window_data):
normalised_data = []
count =0
for window in window_data:
count = count + 1
print(count)
normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
normalised_data.append(normalised_window)
else:
print(str(count), ":window[0] is zero")
return normalised_data
def build_model(layers):
model = Sequential()
model.add(LSTM(
#input_shape=(layers[0],layers[1]),
input_shape=(layers[0], layers[1]),
output_dim=layers[1],
#output_dim=layers[1],
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
layers[2],
return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(layers[3]))
model.add(Activation("softmax"))
start = time.time()
model.compile(loss='mae', optimizer='adam', metrics=['accuracy'])
print("> Compilation Time : ", time.time() - start)
return model
path = os.getcwd()
windows = 100
n_classes = 140
filename = path + '/smallData/resultToCsv.csv'
x_train, y_train, x_test, y_test = load_data(filename, windows, False)
y_train = to_categorical(y_train, n_classes)
y_test = to_categorical(y_test, n_classes)
print("load sucess")
for i in range(20):
print(x_train[i])
layers = np.array([x_train.shape[1], x_train.shape[2], 100, 140])
model = build_model(layers)
print("build model sucess")
model.fit(x_train, y_train, epochs=20, batch_size=20, verbose=1, validation_data=(x_test, y_test))
測試過程: