單變量LSTM預測模型(2)
數據準備
1、時間序列轉監督學習數據
# coding=utf-8
from pandas import read_csv
from pandas import datetime
from pandas import DataFrame
from pandas import concat
#load data
def parser(x):
return datetime.strptime(x, '%Y/%m/%d')
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
'''
將數據轉換成有監督數據
即包含input output
訓練的目的就是找到訓練數據input和output的關係
此處的input是t時間步的數據,output爲t+1時間步的數據
具體實現就是將整體的時間數據向後滑動一格,和原始數據拼接,就是有監督的數據
'''
def timeseries_to_supervised(data, lag=1):#lag表示的是當前的值只與歷史lag個時間步長的值有關,也就是用lag個數據預測下一個
df = DataFrame(data)
colums = [df.shift(i) for i in range(1, lag+1)]#原始數據時間窗向後移動lag步長
colums.append(df)#拼接數據
df = concat(colums, axis=1)#橫向拼接重塑數據,格式:input putput
df.fillna(0, inplace=True)#由於數據整體向後滑動lag後,前面的lag個數據是Na形式,用0來填充
return df
X = series.values
supervised = timeseries_to_supervised(X,1)
print(supervised.head())
2、時間序列轉換成穩定數據
# coding=utf-8
from pandas import read_csv
from pandas import datetime
from pandas import Series
#load data
def parser(x):
return datetime.strptime(x, '%Y/%m/%d')
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
#做差分,去趨勢,獲得差分序列
def difference(dataset, interval=1):
diff = list()
for i in range(interval, len(dataset)):
value = dataset[i]-dataset[i-interval]#當前時間步t的值減去時間步t-interval的值
diff.append(value)
return Series(diff)
#將預測值進行逆處理,得到真實的銷售預測
def inverse_difference(history, yhat, interval=1):#歷史數據,預測數據,差分間隔
return yhat+history[-interval]
#數據處理
#將數據轉換成穩定的
differenced = difference(series, 1)
print(differenced.head())
#逆處理,從差分逆轉得到真實值
inverted = list()
for i in range(len(differenced)):
value = inverse_difference(series, differenced[i], len(series)-i)
inverted.append(value)
inverted = Series(inverted)
print(inverted.head())
3、觀測值縮放
# coding=utf-8
from pandas import read_csv
from pandas import datetime
from pandas import Series
from sklearn.preprocessing import MinMaxScaler
#load data
def parser(x):
return datetime.strptime(x, '%Y/%m/%d')
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
print series.head()
#縮放
X = series.values
X = X.reshape(len(X),1)#MinMaxScaler函數需要矩陣作爲輸入,所以reshape數據爲矩陣
scaler = MinMaxScalar(feature_range=(-1,1))#定義縮放範圍
scaler = scaler.fit(X)#縮放數據
scalered_X = scaler.transform(X)
scalered_series = Series(scalered_X[:,0])
print scalered_series.head()
#逆縮放
inverted_X = scaler.inverse_transform(scalered_X)
inverted_series = Series(inverted_X[:,0])
print inverted_series.head()