圖片風格轉換(附TensorFlow代碼)

論文

A NeuralAlgorithm of Artistic Style. 2015 NIPS


思路

(1)網絡結構解析

採用ImageNet數據集預訓練一個VGG19網絡出來,得到網絡結構如下圖:

 

其中紫色框中的5個layer代表圖像風格層,綠色框代表內容層。

·  對於輸入的風格圖片,通過前向傳播計算出5個風格層的特徵圖,元素平鋪然後求內積(例如28*28平鋪爲784*1,再求內積變成784*784),得到格拉姆矩陣(Gram matrix)。

·  對於輸入的內容圖片,通過前向傳播計算內容層的特徵圖(tensor)。

 

訓練開始之前,先通過隨機的方式初始化一張合成圖,然後進行迭代,每次迭代中都優化損失函數,使得合成圖與5個格拉姆矩陣的差異性、與內容特徵圖的差異性最小。優化算法爲Adam算法。具體而言,損失函數包括2部分:內容損失、風格損失。其中α和β用於控制內容與風格的權重,當內容權重爲0時,得到的就是圖片的紋理特徵。

(2)內容損失函數

(3)風格損失函數

(4)最終損失函數

單獨某層損失函數:

各層綜合損失函數:


TensorFlow源碼

import numpy as np
import scipy.io
import tensorflow as tf
from PIL import Image

# 定義命令行參數
tf.app.flags.DEFINE_string('style_image', '', 'style image')
tf.app.flags.DEFINE_string('content_image','','content image')
tf.app.flags.DEFINE_integer('epochs',5000,'training epochs')
tf.app.flags.DEFINE_float('learning_rate',0.5,'learning rate')
FLAGS=tf.app.flags.FLAGS

# 聲明超參數
STYLE_WEIGHT=0.5
CONTENT_WEIGHT=0.5
STYLE_LAYERS=['relu1_1','relu2_1','relu3_1','relu4_1','relu5_1']
CONTENT_LAYERS=['relu4_2']
_vgg_params=None

def vgg_params():
    # 加載VGG19的權值
    global _vgg_params
    if _vgg_params is None:
        _vgg_params=scipy.io.loadmat('imagenet-vgg-verydeep-19.mat')
        return  _vgg_params

def vgg19(input_image):
    layers=(
        'conv1_1','relu1_1','conv1_2','relu1_2','pool1',
        'conv2_1','relu2_1','conv2_2','relu2_2','pool2',
        'conv3_1','relu3_1','conv3_2','relu3_2','conv3_3','relu3_3','conv3_4','relu3_4','pool3',
        'conv4_1','relu4_1','conv4_2','relu4_2','conv4_3','relu4_3','conv4_4','relu4_4','pool4',
        'conv5_1','relu5_1','conv5_2','relu5_2','conv5_3','relu5_3','conv5_4','relu5_4','pool5'
    )
    weights=vgg_params()['layers'][0]
    net=input_image
    network={}
    for i,name in enumerate(layers):
        layer_type=name[:4]
        if layer_type=='conv':
            kernels,bias=weights[i][0][0][0][0]
            # matconvert weights:[width,height,in_channels,out_channels]
            # tensorflow weights:[height,width,in_channels,out_channels]
            kernels=np.transpose(kernels,(1,0,2,3))
            conv=tf.nn.conv2d(net,tf.constant(kernels),strides=(1,1,1,1),padding='SAME',name=name)
            net=tf.nn.bias_add(conv,bias.reshape(-1))
            net=tf.nn.relu(net)
        elif layer_type=='pool':
            net=tf.nn.max_pool(net,ksize=(1,2,2,1),strides=(1,2,2,1),padding='SAME')
        network[name]=net
    return network

def content_loss(target_features,content_features):
    # 使用特徵圖之差的平方和作爲內容差距,越小則合成圖與原圖的內容越接近
    _,height,width,channel=map(lambda i:i.value,content_features.get_shape())
    content_size=height*width*channel
    return tf.nn.l2_loss(target_features-content_features)/content_size

def style_loss(target_features,style_features):
    # 使用Gram matrix之差的平方和作爲風格差距,越小則合成圖像越具有風格圖的紋理特徵
    _, height, width, channel = map(lambda i: i.value, style_features.get_shape())
    size=height*width*channel
    #targer gram 是特徵圖矩陣的內積
    target_features=tf.reshape(target_features,(-1,channel))
    target_gram=tf.matmul(tf.transpose(target_features),target_features)/size
    style_features=tf.reshape(style_features,(-1,channel))
    style_gram=tf.matmul(tf.transpose(style_features),style_features)/size
    return tf.nn.l2_loss(target_gram-style_gram)/size

def loss_function(content_image,style_image,target_image):
    style_features=vgg19([style_image])
    content_features=vgg19([content_image])
    target_features=vgg19([target_image])
    loss=0.0
    for layer in CONTENT_LAYERS:
        loss += CONTENT_WEIGHT*content_loss(target_features[layer],content_features[layer])
    for layer in STYLE_LAYERS:
        loss += STYLE_WEIGHT*style_loss(target_features[layer],style_features[layer])
    return loss


def stylize(style_image,content_image,learning_rate=0.1,epochs=500):
    # 目標合成圖,初始化爲隨機白噪聲圖
    target=tf.Variable(tf.random_normal(content_image.shape),dtype=tf.float32)
    style_input=tf.constant(style_image,dtype=tf.float32)
    content_input=tf.constant(content_image,dtype=tf.float32)
    cost=loss_function(content_input,style_input,target)

    # 使用adam算法作爲優化算法,最小化代價函數
    train_op=tf.train.AdamOptimizer(learning_rate).minimize(cost)
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for i in range(epochs):
            _,loss,target_image=sess.run([train_op,cost,target])
            print ("iter:%d, loss:%.9f" % (i,loss))
            if (i+1)%100==0:
                # 每100次迭代保存一次圖片
                image = np.clip(target_image+128,0,255).astype(np.uint8)
                Image.fromarray(image).save("neural_%d.jpg" % (i+1))

if __name__=='__main__':
    # 圖片在讀入時,像素值被處理爲0中心,可加速收斂
    style=Image.open(FLAGS.style_image)
    style=np.array(style).astype(np.float32)-128.0
    content=Image.open(FLAGS.content_image)
    content=np.array(content).astype(np.float32)-128.0
    stylize(style,content,FLAGS.learning_rate,FLAGS.epochs)

 

 

 

 

 

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章