數據簡介
數據是常見的氣溫預測數據
先導入數據
features = pd.read_csv('temps.csv')
features.head()
- year,moth,day,week分別表示的具體的時間
- temp_2:前天的最高溫度值
- temp_1:昨天的最高溫度值
- average:在歷史中,每年這一天的平均最高溫度值
- actual:這就是我們的標籤值了,當天的真實最高溫度
- friend:這一列可能是湊熱鬧的,你的朋友猜測的可能值,咱們不管它就好了
我們需要把時間數據先進性簡單的處理
# 處理時間數據
import datetime
# 分別得到年,月,日
years = features['year']
months = features['month']
days = features['day']
# datetime格式
dates = [str(int(year)) + '-' + str(int(month)) + '-' + str(int(day)) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in dates]
dates[:5]
[datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 1, 2, 0, 0),
datetime.datetime(2016, 1, 3, 0, 0),
datetime.datetime(2016, 1, 4, 0, 0),
datetime.datetime(2016, 1, 5, 0, 0)]
對星期進行獨熱編碼
# 獨熱編碼
features = pd.get_dummies(features)
features.head(5)
構建預測數據集,並標準化
# 標籤
labels = np.array(features['actual'])
# 在特徵中去掉標籤
features= features.drop('actual', axis = 1)
# 名字單獨保存一下,以備後患
feature_list = list(features.columns)
# 轉換成合適的格式
features = np.array(features)
from sklearn import preprocessing
input_features = preprocessing.StandardScaler().fit_transform(features)
構建預測網絡-基礎版
#先把數據轉化成tensor格式
x = torch.tensor(input_features, dtype = float)
y = torch.tensor(labels, dtype = float)
# 權重參數初始化
weights = torch.randn((14, 128), dtype = float, requires_grad = True)
biases = torch.randn(128, dtype = float, requires_grad = True)
weights2 = torch.randn((128, 1), dtype = float, requires_grad = True)
biases2 = torch.randn(1, dtype = float, requires_grad = True)
learning_rate = 0.001
losses = []
進行計算
for i in range(1000):
# 計算隱層
hidden = x.mm(weights) + biases
# 加入激活函數
hidden = torch.relu(hidden)
# 預測結果
predictions = hidden.mm(weights2) + biases2
# 通計算損失
loss = torch.mean((predictions - y) ** 2)
losses.append(loss.data.numpy())
# 打印損失值
if i % 100 == 0:
print('loss:', loss)
#返向傳播計算
loss.backward()
#更新參數,加上負的梯度
weights.data.add_(- learning_rate * weights.grad.data)
biases.data.add_(- learning_rate * biases.grad.data)
weights2.data.add_(- learning_rate * weights2.grad.data)
biases2.data.add_(- learning_rate * biases2.grad.data)
# 每次迭代都得記得清空
weights.grad.data.zero_()
biases.grad.data.zero_()
weights2.grad.data.zero_()
biases2.grad.data.zero_()
loss: tensor(4505.6704, dtype=torch.float64, grad_fn=)
loss: tensor(151.8154, dtype=torch.float64, grad_fn=)
loss: tensor(146.2549, dtype=torch.float64, grad_fn=)
loss: tensor(144.3417, dtype=torch.float64, grad_fn=)
loss: tensor(143.3004, dtype=torch.float64, grad_fn=)
loss: tensor(142.5979, dtype=torch.float64, grad_fn=)
loss: tensor(142.0784, dtype=torch.float64, grad_fn=)
loss: tensor(141.6847, dtype=torch.float64, grad_fn=)
loss: tensor(141.3721, dtype=torch.float64, grad_fn=)
loss: tensor(141.1166, dtype=torch.float64, grad_fn=)
簡單版
利用pytorch下nn模塊
#參數設置
input_size = input_features.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16
#整體模型架構
my_nn = torch.nn.Sequential(
torch.nn.Linear(input_size, hidden_size),
torch.nn.Sigmoid(),
torch.nn.Linear(hidden_size, output_size),
)
#損失函數
cost = torch.nn.MSELoss(reduction='mean')
#參數
optimizer = torch.optim.Adam(my_nn.parameters(), lr = 0.001)
訓練
# 訓練網絡
losses = []
for i in range(1000):
batch_loss = []
# MINI-Batch方法來進行訓練
for start in range(0, len(input_features), batch_size):
end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
xx = torch.tensor(input_features[start:end], dtype = torch.float, requires_grad = True)
yy = torch.tensor(labels[start:end], dtype = torch.float, requires_grad = True)
prediction = my_nn(xx)
loss = cost(prediction, yy)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
batch_loss.append(loss.data.numpy())
# 打印損失
if i % 100==0:
losses.append(np.mean(batch_loss))
print(i, np.mean(batch_loss))
0 3950.7627
100 37.9201
200 35.654438
300 35.278366
400 35.116814
500 34.986076
600 34.868954
700 34.75414
800 34.637356
900 34.516705
數據預測
x = torch.tensor(input_features, dtype = torch.float)
predict = my_nn(x).data.numpy()