DCGAN,WGAN,SGAN核心代碼

SGAN:用自己的圖片
# 導入需要的包
from PIL import Image  # Image 用於讀取影像
from skimage import io  # io也可用於讀取影響,效果比Image讀取的更好一些

import tensorflow as tf  # 用於構建神經網絡模型
import matplotlib.pyplot as plt  # 用於繪製生成影像的結果
import numpy as np  # 讀取影像
import os  # 文件夾操作
import time  # 計時
from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import sys

import numpy as np

class GAN():
    def __init__(self):
        self.img_rows = 64
        self.img_cols = 64
        self.channels = 3
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.latent_dim = 100

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        z = Input(shape=(self.latent_dim,))
        img = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and determines validity
        validity = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model(z, validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)


    def build_generator(self):

        model = Sequential()

        model.add(Dense(256, input_dim=self.latent_dim))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(1024))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(1024))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))

        model.add(Dense(1024))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))


        model.add(Dense(np.prod(self.img_shape), activation='tanh'))
        model.add(Reshape(self.img_shape))

        model.summary()

        noise = Input(shape=(self.latent_dim,))
        img = model(noise)

        return Model(noise, img)

    def build_discriminator(self):

        model = Sequential()

        model.add(Flatten(input_shape=self.img_shape))

        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))

        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))

        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))

        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))

        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))
        model.summary()

        img = Input(shape=self.img_shape)
        validity = model(img)

        return Model(img, validity)

    def train(self, epochs, batch_size=128, sample_interval=50):

        # # Load the dataset
        # path = "./mnist.npz"
        # f = np.load(path)
        # X_train, y_train = f["x_train"], f["y_train"]
        #
        # # Rescale -1 to 1
        # X_train = X_train / 127.5 - 1.
        # print("X_train.shape:", X_train.shape)# X_train.shape: (60000, 28, 28)
        # X_train = np.expand_dims(X_train, axis=3)# expand_X_train.shape: (60000, 28, 28, 1)
        # print("expand_X_train.shape:", X_train.shape)
        input_dir = "./papa_image"
        images = os.listdir(input_dir)
        image_len = len(images)

        # 設置一個空data,用於存放數據
        data = np.empty((image_len, self.img_rows, self.img_rows, self.channels), dtype="float32")

        # 逐個圖像讀取
        for i in range(image_len):
            # 如果導入的是skimage.io,則讀取影像應該寫爲img = io.imread(input_dir + images[i])
            img = Image.open(input_dir + "/" + images[i])  # 打開圖像
            img = img.resize((self.img_rows, self.img_rows))  # 將256*256變成64*64
            arr = np.asarray(img, dtype="float32")  # 將格式改爲np.array
            data[i, :, :, :] = arr  # 將其放入data中

        X_train = data
        X_train = X_train / 127.5 - 1
        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a batch of new images
            gen_imgs = self.generator.predict(noise)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self.combined.train_on_batch(noise, valid)

            # Plot the progress
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_images(epoch)

    def sample_images(self, epoch):
        r, c = 5, 5
        noise = np.random.normal(0, 1, (r * c, self.latent_dim))
        gen_imgs = self.generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5
        print("gen_imgs.shape:", gen_imgs.shape)# gen_imgs.shape: (25, 28, 28, 3)
        images = gen_imgs
        # 將一個batch_size的所有圖像進行保存。
        batch_size = len(images)
        n = np.int(np.sqrt(batch_size))

        # 讀取圖像大小,並生成掩模canvas
        image_size = np.shape(images)[2]
        n_channel = np.shape(images)[3]
        # images = np.reshape(images[batch_size - 1, image_size, image_size, n_channel])
        canvas = np.empty((n * image_size, n * image_size, n_channel))

        # 爲每個掩模賦值
        for i in range(n):
            for j in range(n):
                canvas[i * image_size:(i + 1) * image_size, j * image_size:(j + 1) * image_size, :] = images[
                    n * i + j].reshape(image_size, image_size, n_channel)

        # 繪製結果,並設置座標軸
        plt.figure(figsize=(5, 5))
        plt.imshow(canvas, cmap="gray")
        label = "Epoch: {0}".format(epoch + 1)
        # 保存繪製的結果
        plt.savefig("images/%d.png" % epoch)
        plt.close()





if __name__ == '__main__':
    gan = GAN()
    gan.train(epochs=30000, batch_size=32, sample_interval=200)
   SGAN:	手寫字體生成。
   from keras.layers import Input, Dense, Reshape, Flatten, Dropout
from keras.layers import BatchNormalization, Activation, ZeroPadding2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.convolutional import UpSampling2D, Conv2D
from keras.models import Sequential, Model
from keras.optimizers import Adam

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
import sys

import numpy as np

class GAN():
    def __init__(self):
        self.img_rows = 28
        self.img_cols = 28
        self.channels = 1
        self.img_shape = (self.img_rows, self.img_cols, self.channels)
        self.latent_dim = 100

        optimizer = Adam(0.0002, 0.5)

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # The generator takes noise as input and generates imgs
        z = Input(shape=(self.latent_dim,))
        img = self.generator(z)

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # The discriminator takes generated images as input and determines validity
        validity = self.discriminator(img)

        # The combined model  (stacked generator and discriminator)
        # Trains the generator to fool the discriminator
        self.combined = Model(z, validity)
        self.combined.compile(loss='binary_crossentropy', optimizer=optimizer)


    def build_generator(self):

        model = Sequential()

        model.add(Dense(256, input_dim=self.latent_dim))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(1024))
        model.add(LeakyReLU(alpha=0.2))
        model.add(BatchNormalization(momentum=0.8))
        model.add(Dense(np.prod(self.img_shape), activation='tanh'))
        model.add(Reshape(self.img_shape))

        model.summary()

        noise = Input(shape=(self.latent_dim,))
        img = model(noise)

        return Model(noise, img)

    def build_discriminator(self):

        model = Sequential()

        model.add(Flatten(input_shape=self.img_shape))
        model.add(Dense(512))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(256))
        model.add(LeakyReLU(alpha=0.2))
        model.add(Dense(1, activation='sigmoid'))
        model.summary()

        img = Input(shape=self.img_shape)
        validity = model(img)

        return Model(img, validity)

    def train(self, epochs, batch_size=128, sample_interval=50):

        # Load the dataset
        path = "./mnist.npz"
        f = np.load(path)
        X_train, y_train = f["x_train"], f["y_train"]

        # Rescale -1 to 1
        X_train = X_train / 127.5 - 1.
        print("X_train.shape:", X_train.shape)# X_train.shape: (60000, 28, 28)
        X_train = np.expand_dims(X_train, axis=3)# expand_X_train.shape: (60000, 28, 28, 1)
        print("expand_X_train.shape:", X_train.shape)

        # Adversarial ground truths
        valid = np.ones((batch_size, 1))
        fake = np.zeros((batch_size, 1))

        for epoch in range(epochs):

            # ---------------------
            #  Train Discriminator
            # ---------------------

            # Select a random batch of images
            idx = np.random.randint(0, X_train.shape[0], batch_size)
            imgs = X_train[idx]

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Generate a batch of new images
            gen_imgs = self.generator.predict(noise)

            # Train the discriminator
            d_loss_real = self.discriminator.train_on_batch(imgs, valid)
            d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            # ---------------------
            #  Train Generator
            # ---------------------

            noise = np.random.normal(0, 1, (batch_size, self.latent_dim))

            # Train the generator (to have the discriminator label samples as valid)
            g_loss = self.combined.train_on_batch(noise, valid)

            # Plot the progress
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))

            # If at save interval => save generated image samples
            if epoch % sample_interval == 0:
                self.sample_images(epoch)

    def sample_images(self, epoch):
        r, c = 5, 5
        noise = np.random.normal(0, 1, (r * c, self.latent_dim))
        gen_imgs = self.generator.predict(noise)

        # Rescale images 0 - 1
        gen_imgs = 0.5 * gen_imgs + 0.5

        fig, axs = plt.subplots(r, c)
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i,j].imshow(gen_imgs[cnt, :,:,0], cmap='gray')
                axs[i,j].axis('off')
                cnt += 1
        fig.savefig("images/%d.png" % epoch)
        plt.close()


if __name__ == '__main__':
    gan = GAN()
    gan.train(epochs=30000, batch_size=32, sample_interval=200)
DCGAN:自己圖片。
# 導入需要的包
from PIL import Image  # Image 用於讀取影像
from skimage import io  # io也可用於讀取影響,效果比Image讀取的更好一些

import tensorflow as tf  # 用於構建神經網絡模型
import matplotlib.pyplot as plt  # 用於繪製生成影像的結果
import numpy as np  # 讀取影像
import os  # 文件夾操作
import time  # 計時

# 設置相關參數
is_training = True
input_dir = "./data/papa_image/"  # 原始數據的文件夾路徑

# 設置超參數 hyper parameters
batch_size = 64
image_width = 64
image_height = 64
image_channel = 3
data_shape = [64, 64, 3]
data_length = 64 * 64 * 3

z_dim = 100
learning_rate = 0.00005
beta1 = 0.5
epoch = 500
# 讀取數據的函數
def prepare_data(input_dir):
    '''
    函數功能:通過輸入圖像的路徑,讀取訓練數據
    :參數 input_dir: 圖像數據所在的根目錄,即"./face"
    :參數 floder: 圖像數據所在的子目錄, 即"./face/A"
    :return: 返回讀取好的訓練數據
    '''

    # 遍歷圖像路徑,並獲取圖像數量
    images = os.listdir(input_dir)
    image_len = len(images)

    # 設置一個空data,用於存放數據
    data = np.empty((image_len, image_width, image_height, image_channel), dtype="float32")

    # 逐個圖像讀取
    for i in range(image_len):
        # 如果導入的是skimage.io,則讀取影像應該寫爲img = io.imread(input_dir + images[i])
        img = Image.open(input_dir  + "/" + images[i])  # 打開圖像
        img = img.resize((image_width, image_height))  # 將256*256變成64*64
        arr = np.asarray(img, dtype="float32")  # 將格式改爲np.array
        data[i, :, :, :] = arr  # 將其放入data中

    sess = tf.Session()
    sess.run(tf.initialize_all_variables())
    data = tf.reshape(data, [-1, image_width, image_height, image_channel])
    train_data = data * 1.0 / 127.5 - 1.0  # 對data進行正則化
    train_data = tf.reshape(train_data, [-1, data_length])  # 將其拉伸成一維向量
    train_set = sess.run(train_data)
    sess.close()
    return train_set


# 定義生成器
def Generator(z, is_training, reuse):
    '''
    函數功能:輸入噪聲z,生成圖像gen_img
    :param z:即輸入數據,一般爲噪聲
    :param is_training:是否爲訓練環節
    :return: 返回生成影像gen_img
    '''

    # 圖像的channel維度變化爲1->1024->512->256->128->3
    depths = [1024, 512, 256, 128] + [data_shape[2]]

    with tf.variable_scope("Generator", reuse=reuse):
        # 第一層全連接層
        with tf.variable_scope("g_fc1", reuse=reuse):
            output = tf.layers.dense(z, depths[0] * 4 * 4, trainable=is_training)
            output = tf.reshape(output, [batch_size, 4, 4, depths[0]])
            output = tf.nn.relu(tf.layers.batch_normalization(output, training=is_training))

        # 第二層反捲積層1024
        with tf.variable_scope("g_dc1", reuse=reuse):
            output = tf.layers.conv2d_transpose(output, depths[1], [5, 5], strides=(2, 2),
                                                padding="SAME", trainable=is_training)
            output = tf.nn.relu(tf.layers.batch_normalization(output, training=is_training))

        # 第三層反捲積層512
        with tf.variable_scope("g_dc2", reuse=reuse):
            output = tf.layers.conv2d_transpose(output, depths[2], [5, 5], strides=(2, 2),
                                                padding="SAME", trainable=is_training)
            output = tf.nn.relu(tf.layers.batch_normalization(output, training=is_training))

        # 第四層反捲積層256
        with tf.variable_scope("g_dc3", reuse=reuse):
            output = tf.layers.conv2d_transpose(output, depths[3], [5, 5], strides=(2, 2),
                                                padding="SAME", trainable=is_training)
            output = tf.nn.relu(tf.layers.batch_normalization(output, training=is_training))

        # 第五層反捲積層128
        with tf.variable_scope("g_dc4", reuse=reuse):
            output = tf.layers.conv2d_transpose(output, depths[4], [5, 5], strides=(2, 2),
                                                padding="SAME", trainable=is_training)
            gen_img = tf.nn.tanh(output)

    return gen_img


# 定義判別器
def Discriminator(x, is_training, reuse):
    '''
    函數功能:判別輸入的圖像是真或假
    :param x: 輸入數據
    :param is_training: 是否爲訓練環節
    :return: 判別結果
    '''

    # channel維度變化爲:3->64->128->256->512
    depths = [data_shape[2]] + [64, 128, 256, 512]

    with tf.variable_scope("Discriminator", reuse=reuse):
        # 第一層卷積層,注意用的是leaky_relu函數
        with tf.variable_scope("d_cv1", reuse=reuse):
            output = tf.layers.conv2d(x, depths[1], [5, 5], strides=(2, 2),
                                      padding="SAME", trainable=is_training)
            output = tf.nn.leaky_relu(tf.layers.batch_normalization(output, training=is_training))

        # 第二層卷積層,注意用的是leaky_relu函數
        with tf.variable_scope("d_cv2", reuse=reuse):
            output = tf.layers.conv2d(output, depths[2], [5, 5], strides=(2, 2),
                                      padding="SAME", trainable=is_training)
            output = tf.nn.leaky_relu(tf.layers.batch_normalization(output, training=is_training))

        # 第三層卷積層,注意用的是leaky_relu函數
        with tf.variable_scope("d_cv3", reuse=reuse):
            output = tf.layers.conv2d(output, depths[3], [5, 5], strides=(2, 2),
                                      padding="SAME", trainable=is_training)
            output = tf.nn.leaky_relu(tf.layers.batch_normalization(output, training=is_training))

        # 第四層卷積層,注意用的是leaky_relu函數
        with tf.variable_scope("d_cv4", reuse=reuse):
            output = tf.layers.conv2d(output, depths[4], [5, 5], strides=(2, 2),
                                      padding="SAME", trainable=is_training)
            output = tf.nn.leaky_relu(tf.layers.batch_normalization(output, training=is_training))

        # 第五層全鏈接層
        with tf.variable_scope("d_fc1", reuse=reuse):
            output = tf.layers.flatten(output)
            disc_img = tf.layers.dense(output, 1, trainable=is_training)

    return disc_img


def plot_and_save(order, images):
    '''
    函數功能:繪製生成器的結果,並保存
    :param order:
    :param images:
    :return:
    '''

    # 將一個batch_size的所有圖像進行保存
    batch_size = len(images)
    n = np.int(np.sqrt(batch_size))

    # 讀取圖像大小,並生成掩模canvas
    image_size = np.shape(images)[2]
    n_channel = np.shape(images)[3]
    images = np.reshape(images, [-1, image_size, image_size, n_channel])
    canvas = np.empty((n * image_size, n * image_size, image_channel))

    # 爲每個掩模賦值
    for i in range(n):
        for j in range(n):
            canvas[i * image_size:(i + 1) * image_size, j * image_size:(j + 1) * image_size, :] = images[
                n * i + j].reshape(64, 64, 3)

    # 繪製結果,並設置座標軸
    plt.figure(figsize=(8, 8))
    plt.imshow(canvas, cmap="gray")
    label = "Epoch: {0}".format(order + 1)
    plt.xlabel(label)

    # 爲每個文件命名
    if type(order) is str:
        file_name = order
    else:
        file_name = "face_gen" + str(order)

    # 保存繪製的結果
    plt.savefig(file_name)
    print(os.getcwd())
    print("Image saved in file: ", file_name)
    plt.close()


# 定義訓練過程
def training():
    '''
    函數功能:實現DCGAN的訓練過程
    '''
    # 準備數據。這裏輸入根目錄,以A的影像爲例進行圖像生成
    data = prepare_data(input_dir)

    # 構建網絡結構,這是程序的核心部分---------------------------------------------
    x = tf.placeholder(tf.float32, shape=[None, data_length], name="Input_data")
    x_img = tf.reshape(x, [-1] + data_shape)
    z = tf.placeholder(tf.float32, shape=[None, z_dim], name="latent_var")

    G = Generator(z, is_training=True, reuse=False)
    D_fake_logits = Discriminator(G, is_training=True, reuse=False)
    D_true_logits = Discriminator(x_img, is_training=True, reuse=True)

    # 定義生成器的損失函數G_loss
    G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        logits=D_fake_logits, labels=tf.ones_like(D_fake_logits)))

    # 定義判別器的損失函數D_loss
    D_loss_1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        logits=D_true_logits, labels=tf.ones_like(D_true_logits)))
        
    D_loss_2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        logits=D_fake_logits, labels=tf.zeros_like(D_fake_logits)))
    D_loss = D_loss_1 + D_loss_2

    # 定義方差
    total_vars = tf.trainable_variables()
    d_vars = [var for var in total_vars if "d_" in var.name]
    g_vars = [var for var in total_vars if "g_" in var.name]

    # 定義優化方式
    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        g_optimization = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                                beta1=beta1).minimize(G_loss, var_list=g_vars)
        d_optimization = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                                beta1=beta1).minimize(D_loss, var_list=d_vars)
    print("we successfully make the network")

    start_time = time.time()
    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    for i in range(epoch):
        total_batch = int(len(data)/batch_size)
        d_value = 0
        g_value = 0
        for j in range(total_batch):
            batch_xs = data[j * batch_size: j*batch_size + batch_size]
            # 判別器
            z_sampled1 = np.random.uniform(low=-1.0, high=1.0, size=[batch_size, z_dim])
            Op_d, d_ = sess.run([d_optimization, D_loss], feed_dict={x:batch_xs, z:z_sampled1})
            # 生成器
            z_sampled2 = np.random.uniform(low=-1.0, high=1.0, size=[batch_size, z_dim])
            Op_g, g_ = sess.run([g_optimization, G_loss], feed_dict={x: batch_xs, z:z_sampled2})
            # 嘗試保存生成圖像
            images_generated = sess.run(G, feed_dict={z:z_sampled2})
            d_value += d_/total_batch
            g_value += g_/total_batch
            plot_and_save(i, images_generated)

            # 輸出時間和損失函數loss
            hour = int((time.time() - start_time) / 3600)
            min = int(((time.time() - start_time) - 3600 * hour) / 60)
            sec = int((time.time() - start_time) - 3600 * hour - 60 * min)
            print("Time: ", hour, "h", min, "min", sec, "sec", "   Epoch: ",
                  i, "G_loss: ", g_value, "D_loss: ", d_value)



if __name__ == "__main__":
    training()
   WGAN:
   from skimage import io, transform  # 用於讀取影像
import tensorflow as tf  # 構造網絡
import numpy as np
import matplotlib.pyplot as plt  # 繪製結果並保存
import os  # 創建文件夾
from keras.models import Model

image_width = 128  # 圖像寬128像素
image_height = 128  # 圖像高128像素
image_channel = 3  # 圖像的通道數爲3

input_dir = "./data/trainB/"
output_dir = "./data/result/"
batch_size = 64
z_dim = 128
lr_gen = 5e-5  # 生成器的學習率
lr_dis = 5e-5  # 判別器的學習率
epoch = 1000


# 讀取數據的函數,參照之間的DCGAN代碼,這裏做的改進在於讀取數據的庫使用的是skimage而非PIL
def process_data():
    '''
    函數功能:讀取路徑下的所有圖像,返回讀取的圖像數據集train_set和圖像個數image_len
    '''
    images = os.listdir(input_dir)
    image_len = len(images)

    data = np.empty((image_len, image_width, image_height, image_channel), dtype="float32")

    for i in range(image_len):
        # 利用skimage.io.image函數讀取圖像。如果用PIL.Image讀取則會報錯
        img = io.imread(input_dir + images[i])
        print(img.shape)
        # 將所有圖像resize成128*128
        img = transform.resize(img, (image_width, image_height))
        arr = (np.asarray(img, dtype="float32"))
        # 這裏暫時不要對圖像進行歸一化處理,否則結果全是噪聲
        data[i, :, :, :] = arr

    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        data = tf.reshape(data, [-1, image_width, image_height, image_channel])
        train_set = sess.run(data)

    return train_set, image_len

def leaky_relu(x, n, leak=0.2):
    return tf.maximum(x, leak * x, name=n)

def generator(input, random_dim, is_train, reuse=False):
    print("generator--is_train:", is_train)#generator--is_train: Tensor("input/is_train:0", dtype=bool)
    with tf.variable_scope('generator') as scope:
        if reuse:
            scope.reuse_variables()
        w1 = tf.get_variable('w1', shape=[random_dim, 4 * 4 * 512], dtype=tf.float32,
                             initializer=tf.truncated_normal_initializer(stddev=0.02))
        b1 = tf.get_variable('b1', shape=[512 * 4 * 4], dtype=tf.float32,
                             initializer=tf.constant_initializer(0.0))
        flat_conv1 = tf.add(tf.matmul(input, w1), b1, name='flat_conv1')
        # print("flat_conv1:", flat_conv1) flat_conv1: Tensor(" shape=(?, 8192), dtype=float32)

        # 4*4*512                            # -1 指的是隨便
        conv1 = tf.reshape(flat_conv1, shape=[-1, 4, 4, 512], name='conv1')
        # print("conv1:", conv1)conv1: Tensor("generator/conv1:0", shape=(?, 4, 4, 512), dtype=float32)
        bn1 = tf.contrib.layers.batch_norm(conv1, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn1')
        act1 = tf.nn.relu(bn1, name='act1')

        # 8*8*256
        conv2 = tf.layers.conv2d_transpose(act1, 256, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                           name='conv2')
        bn2 = tf.contrib.layers.batch_norm(conv2, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn2')
        act2 = tf.nn.relu(bn2, name='act2')

        # 16*16*128
        conv3 = tf.layers.conv2d_transpose(act2, 128, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                           name='conv3')
        bn3 = tf.contrib.layers.batch_norm(conv3, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn3')
        act3 = tf.nn.relu(bn3, name='act3')

        # 32*32*64
        conv4 = tf.layers.conv2d_transpose(act3, 64, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                           name='conv4')
        bn4 = tf.contrib.layers.batch_norm(conv4, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn4')
        act4 = tf.nn.relu(bn4, name='act4')

        # 64*64*32
        conv5 = tf.layers.conv2d_transpose(act4, 32, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                           name='conv5')
        bn5 = tf.contrib.layers.batch_norm(conv5, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn5')
        act5 = tf.nn.relu(bn5, name='act5')

        # 128*128*3
        conv6 = tf.layers.conv2d_transpose(act5, image_channel, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                           kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                           name='conv6')

        act6 = tf.nn.tanh(conv6, name='act6')
        # print("act6:", act6) # act6: Tensor("generator/act6:0", shape=(?, 128, 128, 3), dtype=float32)
        return act6


def discriminator(input, is_train, reuse=False):
    # print("is_train:", is_train)
    with tf.variable_scope('discriminator') as scope:
        if reuse:
            scope.reuse_variables()

        # 64*64*64
        conv1 = tf.layers.conv2d(input, 64, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                 kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                 name='conv1')
        act1 = leaky_relu(conv1, n='act1')

        # 32*32*128
        conv2 = tf.layers.conv2d(act1, 128, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                 kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                 name='conv2')
        bn2 = tf.contrib.layers.batch_norm(conv2, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn2')
        act2 = leaky_relu(bn2, n='act2')

        # 16*16*256
        conv3 = tf.layers.conv2d(act2, 256, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                 kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                 name='conv3')
        bn3 = tf.contrib.layers.batch_norm(conv3, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None, scope='bn3')
        act3 = leaky_relu(bn3, n='act3')

        # 8*8*512
        conv4 = tf.layers.conv2d(act3, 512, kernel_size=[5, 5], strides=[2, 2], padding="SAME",
                                 kernel_initializer=tf.truncated_normal_initializer(stddev=0.02),
                                 name='conv4')
        bn4 = tf.contrib.layers.batch_norm(conv4, is_training=is_train, epsilon=1e-5, decay=0.9,
                                           updates_collections=None,
                                           scope='bn4')
        act4 = leaky_relu(bn4, n='act4')
        print("act4", act4)
        # start from act4
        dim = int(np.prod(act4.get_shape()[1:]))
        print("dim:", dim)
        fc1 = tf.reshape(act4, shape=[-1, dim], name='fc1')
        w2 = tf.get_variable('w2', shape=[fc1.shape[-1], 1], dtype=tf.float32,
                             initializer=tf.truncated_normal_initializer(stddev=0.02))
        b2 = tf.get_variable('b2', shape=[1], dtype=tf.float32,
                             initializer=tf.constant_initializer(0.0))
        # wgan不適用sigmoid
        logits = tf.add(tf.matmul(fc1, w2), b2, name='logits')
        print("logits:", logits)
        return logits


def plot_and_save(num, images):
    print("----",num, images.shape)# ---- 0 (64, 128, 128, 3)
    batch_size = len(images)
    n = np.int(np.sqrt(batch_size))

    image_size = np.shape(images)[2]
    n_channel = np.shape(images)[3]
    images = np.reshape(images, [-1, image_size, image_size, n_channel])
    canvas = np.empty((n * image_size, n * image_size, image_channel))

    for i in range(n):
        for j in range(n):
            canvas[i * image_size:(i + 1) * image_size, j * image_size:(j + 1) * image_size, :] = images[
                n * i + j].reshape(128, 128, 3)

    plt.figure(figsize=(8, 8))
    plt.imshow(canvas, cmap="gray")
    label = "Epoch: {0}".format(num + 1)
    plt.xlabel(label)

    if type(num) is str:
        file_name = num
    else:
        file_name = "pikachu_gen" + str(num)

    plt.savefig(file_name)
    print(output_dir)
    print("Image saved in file: ", file_name)
    plt.close()


def train():
    # 構建模型
    with tf.variable_scope("input"):
        # 模型中的輸入數據
        real_image = tf.placeholder(tf.float32, shape=[None, image_height, image_width, image_channel], name="real_image")
        random_input = tf.placeholder(tf.float32, shape=[None, z_dim], name="rand_input")
        is_train = tf.placeholder(tf.bool, name="is_train")
    # print("real_image:", real_image) #  Tensor("input/real_image:0", shape=(?, 128, 128, 3), dtype=float32)
    # # 定義WGAN
    # print("is_train:", is_train)
    """
    real_image: Tensor("input/real_image:0", shape=(?, 128, 128, 3), dtype=float32)
    random_input: Tensor("input/rand_input:0", shape=(?, 128), dtype=float32)
    fake_image: Tensor("generator/act6:0", shape=(?, 128, 128, 3), dtype=float32)
    is_train: Tensor("input/is_train:0", dtype=bool)
    """
    fake_image = generator(random_input, z_dim, is_train)
    real_result = discriminator(real_image, is_train)
    print("--real_result:", real_image)
    fake_result = discriminator(fake_image, is_train, reuse=True)

    # 定義損失函數, 這是WGAN的改進所在
    d_loss = tf.reduce_mean(fake_result) - tf.reduce_mean(real_result)
    g_loss = -tf.reduce_mean(fake_result)

    # 定義方差
    t_vars = tf.trainable_variables()

    d_vars = [var
                 for var in t_vars
                     if 'discriminator' in var.name]
    g_vars = [var
                 for var in t_vars
                      if 'generator' in var.name]

    # 定義優化器,這裏使用RMSProp
    trainer_d = tf.train.RMSPropOptimizer(learning_rate=0.0002).minimize(d_loss, var_list=d_vars)
    trainer_g = tf.train.RMSPropOptimizer(learning_rate=0.0002).minimize(g_loss, var_list=g_vars)
    # 權重裁剪至[-0.01, 0.01]
    d_clip = [v.assign(tf.clip_by_value(v, -0.01, 0.01)) for v in d_vars]
    # 模型構建完畢
    # 讀取數據
    image_batch, samples_num = process_data()
    # print(image_batch.shape, samples_num)(200, 128, 128, 3) 200
    # 數據讀取完畢
    batch_num = int(samples_num / batch_size)
    total_batch = 0
    # 創建會話並且初始化
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    print('total training sample num:%d' % samples_num)
    print('batch size: %d, batch num per epoch: %d, epoch num: %d' % (batch_size, batch_num, epoch))
    print('start training...')
    # 逐個epoch進行訓練


    for i in range(epoch):
        # 逐個batch進行訓練
        for j in range(batch_num):
            # 每次訓練d_iters次判別器,訓練g_iters次生成器
            d_iters = 5
            g_iters = 1
            # 隨機噪聲作爲輸入數據
            train_noise = np.random.uniform(-1.0, 1.0, size=[batch_size, z_dim]).astype(np.float32)
            # 每次訓練判別器
            for k in range(d_iters):
                # 拿出batch_size張圖像進行訓練
                train_image = image_batch[j*batch_size:j*batch_size + batch_size]
                # 權值截斷
                sess.run(d_clip)
                # 更新discriminator
                _, dLoss = sess.run([trainer_d, d_loss],
                                    feed_dict={random_input: train_noise, real_image: train_image, is_train: True})
            # 更新generator
            for k in range(g_iters):
                _, gLoss = sess.run([trainer_g, g_loss],
                                    feed_dict={random_input:train_noise, is_train:True})
            # 打印generator和discriminator的loss值
            print("train:[%d/%d], d_loss:%f, g_loss:%f" % (i, j, dLoss, gLoss))

        # 把訓練10個epoch進行一次保持結果
        if i % 10 == 0:
            # 判斷保存結果的文件夾是否存在,若不存在,則創建
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            # 隨機生成噪聲作爲輸入
            sample_noise = np.random.uniform(-1.0, 1.0, size=[batch_size, z_dim]).astype(np.float32)
            # 根據generator生成結果
            # fake_image = generator(random_input, z_dim, is_train)
            imgtest = sess.run(fake_image, feed_dict={random_input:sample_noise, is_train:False})
            # imgtest的格式轉換
            imgtest.astype(np.uint8)
            # 保存結果
            plot_and_save(i, imgtest)
            print("train:[%d], d_loss%f, g_loss:%f" % (i, dLoss, gLoss))





if __name__ == "__main__":
    train()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章