Tensorflow2.x：使用RNNs預測溫度時間序列（時間序列數據轉爲tf輸入流）

原創

十里清风

2020-05-21 15:36

本文使用RNNs預測溫度，數據集使用weather time series dataset，該數據集包含14中不同特徵，如氣溫、氣壓、溼度等，數據的統計間隔爲10分鐘，共包含2009年至2016年共計約42w數據。

根據輸入特徵數、輸出序列長度不同，本文使用三種實現：

Simple LSTM Model: 單一特徵（溫度）預測未來單一時刻的溫度；
多特徵（溫度、溼度和氣壓）預測未來單一時刻的溫度
多特徵（溫度、溼度和氣壓）預測未來多個時刻的溫度；

本文爲Tensorflow官方指南學習筆記，自己整理的完整代碼見github。

基本依賴包和全局變量

import tensorflow as tf

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

tf.random.set_seed(13)

TRAIN_SPLIT = 300000
BUFFER_SIZE = 10000
EVALUATION_INTERVAL = 200
EPOCHS = 20
BATCH_SIZE = 256

載入數據集

zip_path = tf.keras.utils.get_file(
    origin='https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip',
    fname='jena_climate_2009_2016.csv.zip',
    extract=True)
csv_path, _ = os.path.splitext(zip_path)
df = pd.read_csv(csv_path)
df[['T (degC)', 'Date Time']].set_index('Date Time').plot(subplots=True)

公共函數：數據集轉爲模型輸入

def get_dataset(data, target, history_size, target_size, step=1, single=True, training=False):
    """獲取數據集"""
    n = data.shape[0]
    range_size = history_size + target_size
    x = [data[i:i + history_size:step] for i in range(n - range_size + 1)]
    if single:
        y = [data[i:i+1, target] for i in range(range_size - 1, n)]
    else:
        y = [data[i:i + target_size, target] for i in range(history_size, n - target_size + 1)]
    x = np.array(x, dtype=np.float32)
    y = np.array(y, dtype=np.float32)
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    if training:
        dataset = dataset.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
    else:
        dataset = dataset.batch(BATCH_SIZE).repeat()
    return dataset

公共函數：打印過程曲線、預測值

def plot_data(history, future, predict, target_size, step, title, figure=False, show=False):
    """顯示單條預測數據"""
    if figure:
        plt.figure()
    history = np.array(history).flatten()
    future = np.array(future).flatten()
    predict = np.array(predict).flatten()
    # single point
    if future.shape[0] == 1:
        delta = [0] if step == 1 else [target_size // step]
    else:
        delta = np.arange(future.shape[0]) / step
    plt.plot(range(-history.shape[0], 0), history, '.-', label='History')
    plt.plot(delta, future, 'rx', label='True Future')
    plt.plot(delta, predict, 'go', alpha=0.5, label='Model Prediction')
    plt.legend()
    plt.xlim([-history.shape[0], (max(delta) + 5) * 2])
    plt.xlabel('Time-Step')
    if show:
        plt.show()


def plot_history(history, title):
    """顯示訓練loss曲線"""
    plt.figure()
    epochs = range(len(history.history['loss']))
    plt.semilogy(epochs, history.history['loss'], 'b', label='Training Loss')
    plt.semilogy(epochs, history.history['val_loss'], 'r--', label='Validation Loss')
    plt.title(title)
    plt.xlabel('Epochs')
    plt.ylabel('Log loss')
    plt.legend()
    plt.show()

模型一：Simple LSTM Model

# split dataset
dataset = df['T (degC)'].values.reshape(-1, 1)
train_dataset, valid_dataset = dataset[:TRAIN_SPLIT], dataset[TRAIN_SPLIT:]

# standard normalization
train_mean, train_std = train_dataset.mean(axis=0), train_dataset.std(axis=0)
dataset[:] = (dataset - train_mean) / train_std

# tf.data.Dataset input pipline
target, history_size, target_size, step = 0, 10, 1, 1
train_dataset = get_dataset(train_dataset, target, history_size, target_size, training=True)
valid_dataset = get_dataset(valid_dataset, target, history_size, target_size, training=False)

# Recurrent Neural Network
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(8, input_shape=(history_size, dataset.shape[1])),
    tf.keras.layers.Dense(1)])
model.compile(optimizer='adam', loss='mse')
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    steps_per_epoch=EVALUATION_INTERVAL,
    validation_data=valid_dataset,
    validation_steps=50)

# show history
plot_history(history, 'Simple Model Training and Validation Loss')

# show model prediction
# history, future, predict, target_size, step, title, figure=False, show=False    
for i, (x, y) in enumerate(valid_dataset.take(3)):
    plt.figure(figsize=(10, 5))
    x, y, p = x[0, :, target], y[0], model.predict(x)[0]
    plot_data(x, y, p, target_size, step, 'Simple LSTM model')

模型二：Single Variate Model

# split dataset
dataset = df[['p (mbar)', 'T (degC)', 'rho (g/m**3)']].values
train_dataset, valid_dataset = dataset[:TRAIN_SPLIT], dataset[TRAIN_SPLIT:]

# standard normalization
train_mean, train_std = train_dataset.mean(axis=0), train_dataset.std(axis=0)
dataset[:] = (dataset - train_mean) / train_std

# tf.data.Dataset input pipline
history_size, target, step, target_size = 720, 1, 6, 72
train_dataset = get_dataset(train_dataset, target, history_size, target_size, step, training=True)
valid_dataset = get_dataset(valid_dataset, target, history_size, target_size, step, training=False)

# creating the model in the TPUStrategy scope means we will train the model on the TPU
# with tpu_strategy.scope(): 
# Recurrent Neural Network
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1)])
model.compile(optimizer='adam', loss=tf.keras.losses.Huber(delta=1.0))
history = model.fit(
    train_dataset,
    epochs= EPOCHS,
    steps_per_epoch=EVALUATION_INTERVAL,
    validation_data=valid_dataset,
    validation_steps=50)

# show history
plot_history(history, 'Single-Variate Model Training and Validation Loss')

# show single data
for i, (x, y) in enumerate(valid_dataset.take(3)):
    plt.figure(figsize=(10, 5))
    x, y, p = x[0, :, target], y[0], model.predict(x)[0]
    plot_data(x, y, p, target_size, step, 'Single-Variate LSTM model')

模型三：Multi Variate Model

# split dataset
dataset = df[['p (mbar)', 'T (degC)', 'rho (g/m**3)']].values
train_dataset, valid_dataset = dataset[:TRAIN_SPLIT], dataset[TRAIN_SPLIT:]

# standard normalization
train_mean, train_std = train_dataset.mean(axis=0), train_dataset.std(axis=0)
dataset[:] = (dataset - train_mean) / train_std

# tf.data.Dataset input pipline
history_size, target, step, target_size = 720, 1, 6, 72
train_dataset = get_dataset(train_dataset, target, history_size, target_size, step, single=False, training=True)
valid_dataset = get_dataset(valid_dataset, target, history_size, target_size, step, single=False, training=False)

# creating the model in the TPUStrategy scope means we will train the model on the TPU
# with tpu_strategy.scope(): 
# Recurrent Neural Network
model = tf.keras.models.Sequential([                         
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32, activation='tanh'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(200, activation='relu'),
    tf.keras.layers.Dropout(0.1),
    tf.keras.layers.Dense(target_size)])
model.compile(optimizer='adam', loss='mse')
history = model.fit(
    train_dataset,
    epochs= EPOCHS,
    steps_per_epoch=EVALUATION_INTERVAL,
    validation_data=valid_dataset,
    validation_steps=50)

# show history
plot_history(history, 'Multi-Variate Model Training and Validation Loss')

# show single data
for i, (x, y) in enumerate(valid_dataset.take(3)):
    plt.figure(figsize=(10, 5))
    x, y, p = x[0, :, target], y[0], model.predict(x)[0]
    plot_data(x, y, p, target_size, step, 'Multi-Variate LSTM model')

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

Tensorflow2.x：使用RNNs預測溫度時間序列（時間序列數據轉爲tf輸入流）

基本依賴包和全局變量

載入數據集

公共函數：數據集轉爲模型輸入

公共函數：打印過程曲線、預測值

模型一：Simple LSTM Model

模型二：Single Variate Model

模型三：Multi Variate Model

985 碩士程序員，空窗 4 個月沒有 Offer！

營銷系統黑名單優化：位圖的應用解析

一文搞懂 Spring 循環依賴

我真的從測試轉成了開發......

nginx添加相應配置，通過瀏覽器訪問或curl時返回客戶端對應公網IP

賽博鬥地主——使用大語言模型扮演Agent智能體玩牌類遊戲。

python內置函數——sorted

[oeasy]python020在遊戲中體驗數值自由_勇闖地下城_終端文字遊戲

爲何我建議你學會抄代碼

抖音面試：說說延遲任務的調度算法？

變分自編碼器（VAE：Auto-Encoding Variational Bayes）

深度學習：生成對抗網絡（Generative Adversarial Nets, GANs）

依存句法解析：基於深層雙仿射注意力的神經網絡依存解析（Deep Biaffine Attention for Neural Dependency Parsing）

自然語言處理：機器翻譯模型（MT、NMT、Seq2Seq with Attention）

深度學習：正則化防止過擬合（L1、L2、Dropout）

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結