Autoencoder 簡介
機器學習中,處理大量數據會導致需要大量的計算; 有時候,我們會需要選擇對我們機器學習模型的學習貢獻最大的重點數據部分,從而減少計算量;而選擇數據重要部分的過程,這是自動編碼器(Autoencoder )的應用案例之一。
神經網絡是一種計算模型,用於查找描述數據特徵x及其值(迴歸任務)或標籤(分類任務)y之間的關係的函數,即y = f(x );自動編碼器也是一種神經網絡模型,如下圖所示:
從上面的圖示中,自動編碼器由兩個組件組成:(1)一個編碼器(encoder),它學習數據表示,即數據的重要特徵;(2)解碼器(decoder),其根據其關於如何處理數據並且重建數據特徵。
Encoder
第一個組件,編碼器(encoder),類似於常規的前饋網絡(feed-forward network), 但是,它並不負責預測值或標籤;相反,它的任務是學習數據的結構,即數據特徵x的表示過程。
class Encoder(tf.keras.layers.Layer):
def __init__(self, intermediate_dim):
super(Encoder, self).__init__()
self.hidden_layer = tf.keras.layers.Dense(
units=intermediate_dim,
activation=tf.nn.relu,
kernel_initializer='he_uniform'
)
self.output_layer = tf.keras.layers.Dense(
units=intermediate_dim,
activation=tf.nn.sigmoid
)
def call(self, input_features):
activation = self.hidden_layer(input_features)
return self.output_layer(activation)
Decoder
第二組件,解碼器(decoder),也類似於前饋網絡(feed-forward network);但是,它不是將數據縮小到較低維度,而是將數據從其較低維度表示z(即通過encoder後的z)重建爲其原始維度x。
class Decoder(tf.keras.layers.Layer):
def __init__(self, intermediate_dim, original_dim):
super(Decoder, self).__init__()
self.hidden_layer = tf.keras.layers.Dense(
units=intermediate_dim,
activation=tf.nn.relu,
kernel_initializer='he_uniform'
)
self.output_layer = tf.keras.layers.Dense(
units=original_dim,
activation=tf.nn.sigmoid
)
def call(self, code):
activation = self.hidden_layer(code)
return self.output_layer(activation)
現在,我們可以通過實例化編碼器和解碼器層來構建自動編碼器模型。
class Autoencoder(tf.keras.Model):
def __init__(self, intermediate_dim, original_dim):
super(Autoencoder, self).__init__()
self.encoder = Encoder(intermediate_dim=intermediate_dim)
self.decoder = Decoder(intermediate_dim=intermediate_dim, original_dim=original_dim)
def call(self, input_features):
code = self.encoder(input_features)
reconstructed = self.decoder(code)
return reconstructed
一、Tensorflow 2.0版本實現
Tensorflow 2.0安裝:
pip install tensorflow==2.0.0 (無GPU版)
pip install tensorflow-gpu==2.0.0 (帶GPU版)
"""Implementation of vanila autoencoder in TensorFlow 2.0 Subclassing API"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from functools import partial
import tensorflow as tf
dense = partial(tf.keras.layers.Dense, activation=tf.nn.relu, kernel_initializer="he_normal")
class Encoder(tf.keras.Model):
def __init__(self, **kwargs):
super(Encoder, self).__init__()
self.encoder_layer_1 = dense(units=500)
self.encoder_layer_2 = dense(units=500)
self.encoder_layer_3 = dense(units=2000)
self.code_layer = tf.keras.layers.Dense(
units=kwargs["code_dim"], activation=tf.nn.sigmoid
)
def call(self, features):
activation = self.encoder_layer_1(features)
activation = self.encoder_layer_2(activation)
activation = self.encoder_layer_3(activation)
code = self.code_layer(activation)
return code
class Decoder(tf.keras.Model):
def __init__(self, **kwargs):
super(Decoder, self).__init__()
self.decoder_layer_1 = dense(units=2000)
self.decoder_layer_2 = dense(units=500)
self.decoder_layer_3 = dense(units=500)
self.reconstruction_layer = tf.keras.layers.Dense(
units=kwargs["input_shape"], activation=tf.nn.sigmoid
)
def call(self, code):
activation = self.decoder_layer_1(code)
activation = self.decoder_layer_2(activation)
activation = self.decoder_layer_3(activation)
reconstruction = self.reconstruction_layer(activation)
return reconstruction
class Autoencoder(tf.keras.Model):
def __init__(self, **kwargs):
super(Autoencoder, self).__init__()
self.loss = []
self.encoder = Encoder(code_dim=kwargs["code_dim"])
self.decoder = Decoder(input_shape=kwargs["input_shape"])
def call(self, features):
code = self.encoder(features)
reconstructed = self.decoder(code)
return reconstructed
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from autoencoder_tf import Autoencoder
import tensorflow as tf
import numpy as np
np.random.seed(1)
tf.random.set_seed(1)
batch_size = 128
epochs = 10
learning_rate = 1e-2
intermediate_dim = 64
original_dim = 784
def loss(model, original):
reconstruction_error = tf.reduce_mean(tf.square(tf.subtract(model(original), original)))
return reconstruction_error
def train(loss, model, opt, original):
with tf.GradientTape() as tape:
gradients = tape.gradient(loss(model, original), model.trainable_variables)
gradient_variables = zip(gradients, model.trainable_variables)
opt.apply_gradients(gradient_variables)
(training_features, _), (test_features, _) = tf.keras.datasets.mnist.load_data()
training_features = training_features / np.max(training_features)
training_features = training_features.reshape(training_features.shape[0],
training_features.shape[1] * training_features.shape[2])
training_features = training_features.astype('float32')
training_dataset = tf.data.Dataset.from_tensor_slices(training_features)
training_dataset = training_dataset.batch(batch_size)
training_dataset = training_dataset.shuffle(training_features.shape[0])
training_dataset = training_dataset.prefetch(batch_size * 4)
autoencoder = Autoencoder(intermediate_dim=64, original_dim=784)
opt = tf.optimizers.Adam(learning_rate=learning_rate)
writer = tf.summary.create_file_writer('tmp')
with writer.as_default():
with tf.summary.record_if(True):
for epoch in range(epochs):
for step, batch_features in enumerate(training_dataset):
train(loss, autoencoder, opt, batch_features)
loss_values = loss(autoencoder, batch_features)
original = tf.reshape(batch_features, (batch_features.shape[0], 28, 28, 1))
reconstructed = tf.reshape(autoencoder(tf.constant(batch_features)),
(batch_features.shape[0], 28, 28, 1))
tf.summary.scalar('loss', loss_values, step=step)
tf.summary.image('original', original, max_outputs=10, step=step)
tf.summary.image('reconstructed', reconstructed, max_outputs=10, step=step)
二、Pytorch版本實現
Pytoch 安裝:
pip install torch torchvision
"""Implementation of vanila autoencoder in TensorFlow 2.0 Subclassing API"""
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
class Encoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder_hidden_layer = nn.Linear(784, 128)
self.encoder_output_layer = nn.Linear(128, 128)
def forward(self, features):
activation = self.encoder_hidden_layer(features)
activation = F.relu(activation)
code = self.encoder_output_layer(activation)
code = F.relu(code)
return code
class Decoder(nn.Module):
def __init__(self):
super().__init__()
self.decoder_hidden_layer = nn.Linear(128, 128)
self.decoder_output_layer = nn.Linear(128, 784)
def forward(self, features):
activation = self.decoder_hidden_layer(features)
activation = F.relu(activation)
activation = self.decoder_output_layer(activation)
reconstructed = F.relu(activation)
return reconstructed
class Autoencoder(nn.Module):
def __init__(self):
super().__init__()
self.encoder = Encoder()
self.decoder = Decoder()
def forward(self, features):
code = self.encoder(features)
reconstructed = self.decoder(code)
return reconstructed
transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(
root="./torch_datasets", train=True, transform=transform, download=True
)
test_dataset = torchvision.datasets.MNIST(
root="./torch_datasets", train=False, transform=transform, download=True
)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=128, shuffle=True, num_workers=4, pin_memory=True
)
test_loader = torch.utils.data.DataLoader(
test_dataset, batch_size=32, shuffle=False, num_workers=4
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()
epochs = 20
for epoch in range(epochs):
loss = 0
for batch_features, _ in train_loader:
batch_features = batch_features.view(-1, 784).to(device)
optimizer.zero_grad()
outputs = model(batch_features)
train_loss = criterion(outputs, batch_features)
train_loss.backward()
optimizer.step()
loss += train_loss.item()
loss = loss / len(train_loader)
print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
with torch.no_grad():
number = 10
plt.figure(figsize=(20, 4))
for index in range(number):
# display original
ax = plt.subplot(2, number, index + 1)
plt.imshow(test_dataset.data[index].reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
# display reconstruction
ax = plt.subplot(2, number, index + 1 + number)
test_data = test_dataset.data[index]
test_data = test_data.to(device)
test_data = test_data.float()
test_data = test_data.view(-1, 784)
output = model(test_data)
plt.imshow(output.cpu().reshape(28, 28))
plt.gray()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()