神经风格转移中的高原损失

时间:2018-06-29 10:51:26

标签: python tensorflow keras deep-learning

我正在通过从keras加载vgg模型并将其提供给张量流模型来编写样式传递的实现。

我正在使用adam优化程序。损失函数正在减小,但是它非常慢,并且稳定在大约10 8 。同样,样式损失很大(10 8 的数量级),而内容损失则很小(10 5 的数量级)。奇怪的是,paper用于样式转换表示在计算总损失时将内容损失降低100或1000倍。

我试图提高学习率,但这只会使梯度过冲。

我怀疑我的实现中肯定有一个错误,但是尽管进行了无休止的搜索,但我仍然无法找到问题所在。

代码如下:

# coding: utf-8
# In[1]:

from keras.applications.vgg16 import VGG16
from keras.models import Model
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
import matplotlib.pyplot as plt


# In[2]:


content_image_path = './skyline.jpg'
style_image_path = './starry_night.jpg'
output_image_path = './output.jpg'

# In[4]:

from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input

# In[5]:

content_image = image.load_img(content_image_path, target_size=(224, 224))
#plt.imshow(content_image)
content_arr = image.img_to_array(content_image)
content_arr = tf.convert_to_tensor(preprocess_input(np.expand_dims(content_arr, axis=0)), tf.float64)
sess.run(tf.shape(content_arr))

# In[6]:

style_image = image.load_img(style_image_path, target_size=(224, 224))
#plt.imshow(style_image)
style_arr = image.img_to_array(style_image)
style_arr = tf.convert_to_tensor(preprocess_input(np.expand_dims(style_arr, axis=0)), tf.float64)
sess.run(tf.shape(style_arr))

# In[7]:

#generate random image with pixel values b/w 0 -> 255
o_input = np.random.randint(low=0, high=256, size=(224, 224, 3)).astype('float64')
plt.imshow(o_input)
o_input_old = np.copy(o_input)
o_input = preprocess_input(np.expand_dims(o_input, axis=0))
print(o_input_old)

o_input_var = tf.Variable(o_input, name="gen_img_vector", trainable=True)

# In[8]:

content_model = VGG16(include_top=False, weights='imagenet', input_tensor=content_arr, input_shape=(224, 224, 3))
style_model = VGG16(include_top=False, weights='imagenet', input_tensor=style_arr, input_shape=(224, 224, 3))
train_model = VGG16(include_top=False, weights='imagenet', input_tensor=o_input_var, input_shape=(224, 224, 3))

# In[10]:

content_model.summary()

# In[11]:

def get_feature_rep(layer_type, layer_names, model):

    outputs = []
    for name in layer_names:
        out = model.get_layer(name=name).output

        N = tf.shape(out)[3]#number of channels
        M = tf.multiply(tf.shape(out)[1], tf.shape(out)[2])#product of dimensions

        out = tf.transpose(tf.reshape(out, (M, N)))#Flattens each channel into 1-D tensor & reshapes layer
        if layer_type == 'style':
            out = get_gram_matrix(out)
        print(out)
        outputs.append(out)
    return outputs

# In[12]:

def get_gram_matrix(F):
    G = tf.matmul(F, tf.transpose(F))
    return G


# In[13]:


def style_loss(Gs, As):

    total = tf.Variable(tf.constant(0.0, tf.float64), name="style_loss", trainable=False)
    style_reps = list(zip(Gs, As))

    for layer in style_reps:
        loss = tf.reduce_sum(tf.cast(tf.squared_difference(layer[0], layer[1]), tf.float64), [0, 1])
        N_layer = tf.shape(layer[0])[0]
        M_layer = tf.shape(layer[0])[1]
        den = tf.square(tf.cast(tf.multiply(N_layer, M_layer), tf.float64))
        loss = loss/den
        loss = loss*0.2/4.0 #weighting loss
        total = total + loss

    return total


# In[14]:

def content_loss(P, F):
#     loss = tf.Variable(tf.constant(0.0, tf.float64), name="content_loss", trainable=False)
    loss = tf.reduce_sum(tf.cast(tf.squared_difference(P, F), tf.float64), [0, 1])
    loss = loss/2.0
    return loss

# In[15]:

content_layer_names = ['block4_conv2']
style_layer_names = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1']

# In[32]:

P = tf.squeeze(get_feature_rep('content', content_layer_names, content_model))

# In[34]:

F = tf.squeeze(get_feature_rep('content', content_layer_names, train_model))

# In[18]:

#Each member of As consists of a feature map corresponding to a particular layer (dim. channels x pixels per channel)
As = get_feature_rep('style', style_layer_names, style_model)

# In[19]:

Gs = get_feature_rep('style', style_layer_names, train_model)

# In[20]:

styleloss = style_loss(Gs, As)

# In[21]:

contentloss = content_loss(P, F)

# In[22]:

total_loss = tf.add(styleloss, tf.multiply(tf.constant(0.01, tf.float64), contentloss))


# In[23]:

optimizer = tf.train.AdamOptimizer(5).minimize(total_loss, var_list=[o_input_var])

# In[26]:

def reprocess(x):
    VGG_MEAN = [123.68, 116.78, 103.94]
    means = tf.reshape(tf.constant(VGG_MEAN, tf.float64), [1, 1, 3])
    #Undo mean imagenet scale preprocessing
    x = tf.add(x, means)
    tf.clip_by_value(x, 0, 255)
    #bgr to rgb
    x = x[..., ::-1]
    return x

# In[27]:

saver = tf.train.Saver(tf.global_variables())

# In[28]:

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

#     saver.restore(sess, './model/nst_model.ckpt')

    for epoch in range(100):
        _, styleloss_curr, contentloss_curr, loss_curr, new_arr = sess.run([optimizer, styleloss, contentloss, total_loss, o_input_var])

        print('Epoch: %i    Content Loss: %.2f    Style Loss: %.2f    Total Loss: %.2f' % (epoch, contentloss_curr, styleloss_curr, loss_curr))

        if epoch % 15 == 0:
            saver.save(sess, './model/nst_model.ckpt')

# In[30]:

with tf.Session() as sess:
    new_arr = reprocess(new_arr)
    new_im = sess.run(tf.cast(tf.round(tf.squeeze(new_arr)), tf.uint8))
#     new_im = new_im[...,::-1]
#     print(sess.run(new_arr[0]/255))
    print(sess.run(tf.shape(new_im)))
    plt.imshow(new_im)

0 个答案:

没有答案