#coding=utf-8
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_squared_error
from math import sqrt
from matplotlib import pyplot
from pandas import datetime
def parser(x):
return datetime.strptime(x, '%Y/%m/%d')
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if type(data) is list else data.shape[1]
df = DataFrame(data)#數據多了行標、列標
cols, names = list(), list()
for i in range(n_in, 0, -1):
cols.append(df.shift(i))
names+=[('var%d(t-%d)'%(j+1,i)) for j in range(n_vars)]
for i in range(0, n_out, 1):
cols.append(df.shift(-i))
if i==0:
names+=[('var%d(t)'%(j+1)) for j in range(n_vars)]
else:
names+=[('var%d(t+%d)'%(j+1, i)) for j in range(n_vars)]
agg = concat(cols, axis=1)
agg.columns = names
if dropnan:
agg.dropna(inplace=True)
return agg
#拆分正訓練+測試數據
def prepare_data(series, n_test, n_lay, n_seq):
raw_values = series.values
raw_values = raw_values.reshape(len(raw_values), 1)
supervised = series_to_supervised(raw_values, n_lay, n_seq)
supervised_values = supervised.values
train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
return train, test
#persistence model預測
#用上一次觀察值作爲之後n_seq的預測值
def persistence(last_ob, n_seq):
return [last_ob for i in range(n_seq)]
#評估persistence model
def make_forcast(train, test, n_lay, n_seq):
forcasts = list()
for i in range(len(test)):
x, y = test[i, 0:n_lag], test[i, n_lag:]
forcast = persistence(x[-1], n_seq)
forcasts.append(forcast)
return forcasts
#預測評估
def evaluate_forcasts(test, forcasts, n_lag, n_seq):
for i in range(n_seq):
actual = test[:, (n_lag+i)]
predicted = [forcast[i] for forcast in forcasts]
print 'predicted'
print predicted
rmse = sqrt(mean_squared_error(actual, predicted))
print 't+%d RMSE:%f'%((i+1), rmse)#1~n_seq各個長度的預測的rmse
def plot_forcasts(series, forcasts, n_test):
#原始數據
pyplot.plot(series.values)
#預測數據
for i in range(len(forcasts)):
off_s = len(series)-n_test+i-1
off_e = off_s + len(forcasts[i])+1
xaxis = [x for x in range(off_s, off_e)]
yaxis = [series.values[off_s]]+forcasts[i]
print 'xaxis'
print xaxis
print 'yaxis'
print yaxis
print 'series.values[off_s]'
print series.values[off_s]
pyplot.plot(xaxis, yaxis, color='red')
pyplot.show()
series = read_csv('data_set/shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parse
r)
#一步數據,預測3步
n_lag = 1
n_seq = 3
n_test = 10#給了最後12個月,預測3個月,則能預測的次數是10,即10個3個月
train, test = prepare_data(series, n_test, n_lag, n_seq)
print 'train data'
print train
print 'test data'
print test
forecasts = make_forcast(train, test, n_lag, n_seq)
print 'forecasts'
print forecasts
evaluate_forcasts(test, forecasts, n_lag, n_seq)
plot_forcasts(series, forecasts, n_test+2)