算法簡介
GAN最直觀的解釋就是博弈,同時訓練兩個網絡(生成網絡和判別網絡),二者訓練都是爲了打敗彼此。生成網絡將隨機潛在向量轉換爲圖像,判別器試圖分辨真實圖像與生成圖像。
生成網絡:以一張隨機向量作爲輸入,解碼爲合成圖像
判別網絡:輸入圖像,輸出真或假的類別
實驗中搭建的是DCGAN深度卷積生成式對抗網絡,即生成網絡和判別網絡都是深度卷積網絡,具體實現流程如下:
(1)潛在空間抽取隨機噪聲
(2)生成網絡利用這些隨機噪聲生成圖像
(3)將生成圖像與真實圖像打上標籤,並混合,
(4)利用混合後的圖像集去訓練判別網絡
(5)回到(1)
代碼實現
import keras
from keras import layers
import numpy as np
import os
from keras.preprocessing import image
latent_dim = 32
height = 32
width = 32
channels = 3
generator_input = keras.Input(shape=(latent_dim,))
# 生成模型
# 將輸入轉換爲16*16的128個通道的特徵圖
x = layers.Dense(128 * 16 * 16)(generator_input)
x = layers.LeakyReLU()(x)
x = layers.Reshape((16, 16, 128))(x)
x = layers.Conv2D(256, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2DTranspose(256, 4, strides=2, padding='same')(x) # 使用Conv2DTranspose層對圖像進行上採樣
x = layers.LeakyReLU()(x)
x = layers.Conv2D(256, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(256, 5, padding='same')(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(channels, 7, activation='tanh', padding='same')(x)
generator = keras.models.Model(generator_input, x) # 將生成器實例化(由向量映射到圖像)
print(generator.summary())
# 判別器模型
discriminator_input = layers.Input(shape=(height, width, channels))
x = layers.Conv2D(128, 3)(discriminator_input)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(128, 4, strides=2)(x)
x = layers.LeakyReLU()(x)
x = layers.Conv2D(128, 4, strides=2)(x)
x = layers.LeakyReLU()(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(1, activation='sigmoid')(x)
discriminator = keras.models.Model(discriminator_input, x) # 將判別器模型實例化
discriminator.summary()
discriminator_optimizer = keras.optimizers.RMSprop(lr=0.0008, clipvalue=1.0, decay=1e-8)
discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')
discriminator.trainable = False # 將判別器權重設置爲不可訓練
gan_input = keras.Input(shape=(latent_dim,))
gan_output = discriminator(generator(gan_input))
gan = keras.models.Model(gan_input, gan_output) # gan模型實例化
gan_optimizer = keras.optimizers.RMSprop(lr=0.0004, clipvalue=1.0, decay=1e-8)
gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')
(x_train, y_train), (_, _) = keras.datasets.cifar10.load_data() # 加載cifar10訓練集
x_train = x_train[y_train.flatten() == 6] # 選擇其中的青蛙圖像
x_train = x_train.reshape((x_train.shape[0],) + (height, width, channels)).astype('float32') / 255. # 數據標準化
iterations = 10000
batch_size = 20
save_dir = './gan_png' # 指定保存生成圖像的目錄
start = 0
for step in range(iterations):
random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) # 在潛在空間採樣隨機點
generated_images = generator.predict(random_latent_vectors) # 將隨機點解碼爲圖像
stop = start + batch_size
real_images = x_train[start: stop]
combined_images = np.concatenate([generated_images, real_images]) # 將生成的圖像與真實圖像混合
labels = np.concatenate([np.ones((batch_size, 1)), np.zeros((batch_size, 1))]) # 所有圖像的標籤
labels += 0.05 * np.random.random(labels.shape) # 向標籤中添加隨機噪聲
d_loss = discriminator.train_on_batch(combined_images, labels) # 訓練判別器
random_latent_vectors = np.random.normal(size=(batch_size, latent_dim)) # 在潛在空間採樣隨機點
misleading_targets = np.zeros((batch_size, 1)) # 合併標籤
a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets) # 凍結判別器,訓練生成器
# 保存與展示
start += batch_size
if start > len(x_train) - batch_size:
start = 0
if step % 100 == 0:
gan.save_weights('gan.h5')
print('discriminator loss:', d_loss)
print('adversarial loss:', a_loss)
img = image.array_to_img(generated_images[0] * 255., scale=False)
img.save(os.path.join(save_dir, 'generated_frog' + str(step) + '.png'))
img = image.array_to_img(real_images[0] * 255., scale=False)
img.save(os.path.join(save_dir, 'real_frog' + str(step) + '.png'))