Python時間序列LSTM預測系列教程(10)-多步預測

Multi-Step LSTM預測(1)




數據集



數據準備與模型評估


1、拆分成訓練和測試數據。
訓練數據=前兩年香皂銷售數據
測試數據=剩下一年的香皂銷售數據

2、Multi-Step 預測
假設需要預測3個月的銷售數據

3、模型評估
用rolling-forcast(walk-forward)方式模型驗證
測試數據每個時間步,滑動一個值,預測;之後測試數據的下一個真實觀測值加入模型,並預測

用RMSE評估

持久模型(Persistence Model)


他是很好的時間序列預測的基準
是最簡單的預測

原理:
用當前值作爲之後的預測值

靜態模型完整過程


1、轉換成有監督數據
2、切分成訓練和測試數據
3、訓練模型
4、預測
5、評估
6、輸出

靜態模型預測代碼解析


#coding=utf-8                                                                                                          
from pandas import read_csv      
from pandas import DataFrame     
from pandas import concat
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot    
from pandas import datetime      
 
def parser(x):
    return datetime.strptime(x, '%Y/%m/%d')
 
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)#數據多了行標、列標
    cols, names = list(), list()
    for i in range(n_in, 0, -1): 
        cols.append(df.shift(i)) 
        names+=[('var%d(t-%d)'%(j+1,i)) for j in range(n_vars)]
    for i in range(0, n_out, 1): 
        cols.append(df.shift(-i))
        if i==0:
            names+=[('var%d(t)'%(j+1)) for j in range(n_vars)]
        else:
            names+=[('var%d(t+%d)'%(j+1, i)) for j in range(n_vars)]
    agg = concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg
	
#拆分正訓練+測試數據                                                                                                   
def prepare_data(series, n_test, n_lay, n_seq):
    raw_values = series.values
    raw_values = raw_values.reshape(len(raw_values), 1)
    
    supervised = series_to_supervised(raw_values, n_lay, n_seq)
    supervised_values = supervised.values
 
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return train, test
 
#persistence model預測
#用上一次觀察值作爲之後n_seq的預測值
def persistence(last_ob, n_seq):
    return [last_ob for i in range(n_seq)]
 
#評估persistence model
def make_forcast(train, test, n_lay, n_seq):
    forcasts = list()
    for i in range(len(test)):
        x, y = test[i, 0:n_lag], test[i, n_lag:]
        forcast = persistence(x[-1], n_seq)
        forcasts.append(forcast)
    return forcasts
 
#預測評估
def evaluate_forcasts(test, forcasts, n_lag, n_seq):
	for i in range(n_seq): 
		actual = test[:, (n_lag+i)]
        predicted = [forcast[i] for forcast in forcasts]
        print 'predicted'
        print predicted
        rmse = sqrt(mean_squared_error(actual, predicted))
        print 't+%d RMSE:%f'%((i+1), rmse)#1~n_seq各個長度的預測的rmse
 
def plot_forcasts(series, forcasts, n_test):
    #原始數據
    pyplot.plot(series.values)
    #預測數據
    for i in range(len(forcasts)):
        off_s = len(series)-n_test+i-1
        off_e = off_s + len(forcasts[i])+1
        xaxis = [x for x in range(off_s, off_e)]
        yaxis = [series.values[off_s]]+forcasts[i]
        print 'xaxis'
        print xaxis
        print 'yaxis'
        print yaxis
        print 'series.values[off_s]'
        print series.values[off_s]
        pyplot.plot(xaxis, yaxis, color='red')
    pyplot.show()
 
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parse
r)
 
#一步數據,預測3步
n_lag = 1
n_seq = 3     
n_test = 10#給了最後12個月,預測3個月,則能預測的次數是10,即10個3個月
train, test = prepare_data(series, n_test, n_lag, n_seq)
print 'train data'
print train
print 'test data'
print test
forecasts = make_forcast(train, test, n_lag, n_seq)
print 'forecasts'
print forecasts
evaluate_forcasts(test, forecasts, n_lag, n_seq)
plot_forcasts(series, forecasts, n_test+2)    	








發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章