基于LSTM的多变量多输出温湿度预测

1、模块导入

import tensorflow as tf
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
from tensorflow.keras import utils,losses,layers,Sequential
from tensorflow.keras.callbacks import ModelCheckpoint,TensorBoard


mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['figure.dpi'] = 150
mpl.rcParams['axes.grid'] = False

2、加载数据集、预处理

使用 Max Planck Institute for Biogeochemistry 的天气时间序列数据集。该数据集包含14个不同的特征，例如气温，大气压力和湿度。从2003年开始，每10分钟收集一次。为了提高效率，本文仅使用2009年至2016年之间收集的数据。

下载数据集：

zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)
csv_path, _ = os.path.splitext(zip_path)

加载数据集：

df = pd.read_csv(csv_path,parse_dates=['Date Time'],index_col=['Date Time'])
df.head()

	p (mbar)	T (degC)	Tpot (K)	Tdew (degC)	rh (%)	VPmax (mbar)	VPact (mbar)	VPdef (mbar)	sh (g/kg)	H2OC (mmol/mol)	rho (g/m**3)	wv (m/s)	max. wv (m/s)	wd (deg)
Date Time
2009-01-01 00:10:00	996.52	-8.02	265.40	-8.90	93.3	3.33	3.11	0.22	1.94	3.12	1307.75	1.03	1.75	152.3
2009-01-01 00:20:00	996.57	-8.41	265.01	-9.28	93.4	3.23	3.02	0.21	1.89	3.03	1309.80	0.72	1.50	136.1
2009-01-01 00:30:00	996.53	-8.51	264.91	-9.31	93.9	3.21	3.01	0.20	1.88	3.02	1310.24	0.19	0.63	171.6
2009-01-01 00:40:00	996.51	-8.31	265.12	-9.07	94.2	3.26	3.07	0.19	1.92	3.08	1309.19	0.34	0.50	198.0
2009-01-01 00:50:00	996.51	-8.27	265.15	-9.04	94.1	3.27	3.08	0.19	1.92	3.09	1309.00	0.32	0.63	214.3

如上所示，每10分钟记录一次观测值，一个小时内有6个观测值，一天有144（6x24）个观测值。给dataset插入新列，列为Data Time列的相应时间值

3、数据可视化

画图看相关性，提取有效特征集

这里选择p (mbar)、Tdew (degC)、max. wv (m/s)作为T (degC)和rh (%)的特征

plt.figure(figsize=(16,8))
#作图辅助库
sns.lineplot(x='p (mbar)',y='T (degC)',data=df[:10000])
plt.show()

plt.figure(figsize=(16,8))
sns.lineplot(x='Tdew (degC)',y='T (degC)',data=df[:10000])
plt.show()

plt.figure(figsize=(16,8))
sns.lineplot(x='max. wv (m/s)',y='T (degC)',data=df[:50000])
plt.show()

以上是看T (degC)和p (mbar)、Tdew (degC)、max. wv (m/s),的关系

给dataset插入新列，列为Data Time列的相应时间值

df['year']=df.index.year
df['hour']=df.index.hour
df['month']=df.index.month

时间与温度的点图

plt.figure(figsize=(16,8))
sns.pointplot(x='hour',y='T (degC)',data=df[0:50000],hue='month')
plt.show()

时间与湿度的点图

plt.figure(figsize=(16,8))
sns.pointplot(x='hour',y='rh (%)',data=df[0:50000],hue='month')
plt.show()

由于温度与每日的小时变化有关系，而且0-23作为一个循环，所以用三角函数提取周期信息，sin和cos同时使用是因为确保24小时为一个周期

df['sin(h)']=[np.sin((x) * (2 * np.pi / 24)) for x in df['hour']]
df['cos(h)']=[np.cos((x) * (2 * np.pi / 24)) for x in df['hour']]
df

4、数据预处理

切分数据集

#定义切分函数，x是选取的特征组成的例表，y是标签列（x=dataset[future=] ，y=dataset['T (degC)']）
#train_dataset,train_labels=multivariate_data(x_train,y_train,0,100000,3,1,1,True)
#上面的一个使用的意思就是：从0开始数到10万，按照3条x数据作为一个元素放入data-》1条y数据作为一个元素存入labels，step=1表示每一条数据就按照上面包装一次，比如data[0]=x[0，1，2]->labels[0]=y[3];data[1]=x[1,2,3]->labels[1]=y[4];
#single_step意思是只预测目标的一个未来状态，只预测后1小时，设置为false可以预测未来0到target_size小时内的温度。
def multivariate_data(x,y, start_index, end_index, history_size,target_size, step, single_step):
    data = []
    labels = []

    start_index = start_index + history_size
    
    if end_index is None:
        end_index = len(dataset) - target_size

    for i in range(start_index, end_index):
        indices = range(i-history_size, i, step) # step表示滑动步长
        mid_data=x.iloc[indices]
        data.append(mid_data)

        if single_step:
            mid_data=y.iloc[i+target_size]
            labels.append(mid_data)
        else:
            labels.append(y.iloc[i:i+target_size])

    return np.array(data), np.array(labels)

数据归一化

future=['sin(h)','cos(h)','month','max. wv (m/s)','p (mbar)','T (degC)','rh (%)']
#数据归一化，由于sin和cos本来就是-1到1，不用归一化
for col in future:
    scaler=MinMaxScaler()
    if(col not in ['sin(h)','cos(h)']):
        df[col]=scaler.fit_transform(df[col].values.reshape(-1,1))

格式转化与分组和打乱

#获取训练特征和训练标签
label = ['T (degC)','rh (%)']
x=df[future]
y=df[label]

#通过7-3划分训练集和测试集，70%为训练集 30%为测试集
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,shuffle=False,random_state=13)

#取得训练集，和测试集的格式——》(3,6)->(1,)通过6行历史数据7列目标特征预测1行1列的目标
train_dataset,train_labels=multivariate_data(x_train,y_train,0,100000,3,1,1,True)
test_dataset,test_labels=multivariate_data(x_test,y_test,0,100000,3,1,1,True)

#创建训练组，内部的batch_size，buffer_size，shuffle，batch建议百度
#该函数目标是把刚建好的训练集/测试集转化成tensorflow的数据集格式，打乱分组方便训练模型......
def create_batch_dataset(x,y,train=True,buffer_size=1000,batch_size=128):
    batch_data=tf.data.Dataset.from_tensor_slices((tf.constant(x),tf.constant(y)))
    if train:
        return batch_data.cache().shuffle(buffer_size).batch(batch_size)
    else:
        return batch_data.batch(batch_size)
        
#使用上面函数
train_batch_dataset=create_batch_dataset(train_dataset,train_labels)
test_batch_dataset=create_batch_dataset(test_dataset,test_labels,train=False)

5、模型搭建、编译、训练

#建立神经网络模型-3层LSTM和一个输出层
model= tf.keras.models.Sequential([
    
    tf.keras.layers.LSTM(256, input_shape=train_dataset.shape[-2:],return_sequences=True), # input_shape=(20,1) 不包含批处理维度
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(2)
])

#优化器和损失函数设置
model.compile(optimizer='adam',loss='mse')

#模型保存的相关设置
utils.plot_model(model)
checkpoint_file='test_model.hdf5'
checkpoint_callback=ModelCheckpoint(filepath=checkpoint_file,monitor='loss',moode='min',save_best_only=True,save_weights_only=True)
#模型训练
history=model.fit(train_batch_dataset,epochs=30,validation_data=test_batch_dataset,callbacks=[checkpoint_callback])

通过history获取模型每步训练取得的结果loss和val_loss

plt.figure(figsize=(8,8),dpi=200)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model train vs validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','validation'], loc='best')
plt.show()

6、模型验证

#通过输入一组数据预测
test_preds=model.predict(test_dataset,verbose=1)
test_preds[:10]

# #将预测后的一组数据转化为1维方便比较
TEMP_preds = test_preds[:,0]
TEMP_labels = test_labels[:,0]
HUM_preds = test_preds[:,1]
HUM_labels = test_labels[:,1]

温度预测

#r2检验
score=r2_score(TEMP_labels,TEMP_preds)
print(score)

0.991836296750627

#做出预测结果和实际结果的曲线对比
plt.figure(figsize=(16,8))
plt.plot(TEMP_labels,label="True value")
plt.plot(TEMP_preds,label="Pred value")
plt.legend(loc='best')
plt.show()

湿度预测

#r2检验
score=r2_score(HUM_labels,HUM_preds)
print(score)

0.9854786099464197

#做出预测结果和实际结果的曲线对比，使用1000次结果对比
plt.figure(figsize=(16,8))
plt.plot(HUM_labels,label="True value")
plt.plot(HUM_preds,label="Pred value")
plt.legend(loc='best')
plt.show()

基于LSTM的多变量多输出温湿度预测

1、模块导入

2、加载数据集、预处理

3、数据可视化

4、数据预处理

切分数据集

数据归一化

格式转化与分组和打乱

5、模型搭建、编译、训练

6、模型验证

温度预测

湿度预测

Golang爬虫代理接入的技术与实践

牛客SQL-大廠面試真題

Flink CDC

數據庫流轉工具—Maxwell

騰訊音樂SQL題

森林火災模擬軟件--FlamMap

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結