我正在通过从keras加载vgg模型并将其提供给张量流模型来编写样式传递的实现。
我正在使用adam优化程序。损失函数正在减小,但是它非常慢,并且稳定在大约10 8 。同样,样式损失很大(10 8 的数量级),而内容损失则很小(10 5 的数量级)。奇怪的是,paper用于样式转换表示在计算总损失时将内容损失降低100或1000倍。
我试图提高学习率,但这只会使梯度过冲。
我怀疑我的实现中肯定有一个错误,但是尽管进行了无休止的搜索,但我仍然无法找到问题所在。
代码如下:
# coding: utf-8
# In[1]:
from keras.applications.vgg16 import VGG16
from keras.models import Model
import tensorflow as tf
import tensorflow.contrib.eager as tfe
import numpy as np
import matplotlib.pyplot as plt
# In[2]:
content_image_path = './skyline.jpg'
style_image_path = './starry_night.jpg'
output_image_path = './output.jpg'
# In[4]:
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
# In[5]:
content_image = image.load_img(content_image_path, target_size=(224, 224))
#plt.imshow(content_image)
content_arr = image.img_to_array(content_image)
content_arr = tf.convert_to_tensor(preprocess_input(np.expand_dims(content_arr, axis=0)), tf.float64)
sess.run(tf.shape(content_arr))
# In[6]:
style_image = image.load_img(style_image_path, target_size=(224, 224))
#plt.imshow(style_image)
style_arr = image.img_to_array(style_image)
style_arr = tf.convert_to_tensor(preprocess_input(np.expand_dims(style_arr, axis=0)), tf.float64)
sess.run(tf.shape(style_arr))
# In[7]:
#generate random image with pixel values b/w 0 -> 255
o_input = np.random.randint(low=0, high=256, size=(224, 224, 3)).astype('float64')
plt.imshow(o_input)
o_input_old = np.copy(o_input)
o_input = preprocess_input(np.expand_dims(o_input, axis=0))
print(o_input_old)
o_input_var = tf.Variable(o_input, name="gen_img_vector", trainable=True)
# In[8]:
content_model = VGG16(include_top=False, weights='imagenet', input_tensor=content_arr, input_shape=(224, 224, 3))
style_model = VGG16(include_top=False, weights='imagenet', input_tensor=style_arr, input_shape=(224, 224, 3))
train_model = VGG16(include_top=False, weights='imagenet', input_tensor=o_input_var, input_shape=(224, 224, 3))
# In[10]:
content_model.summary()
# In[11]:
def get_feature_rep(layer_type, layer_names, model):
outputs = []
for name in layer_names:
out = model.get_layer(name=name).output
N = tf.shape(out)[3]#number of channels
M = tf.multiply(tf.shape(out)[1], tf.shape(out)[2])#product of dimensions
out = tf.transpose(tf.reshape(out, (M, N)))#Flattens each channel into 1-D tensor & reshapes layer
if layer_type == 'style':
out = get_gram_matrix(out)
print(out)
outputs.append(out)
return outputs
# In[12]:
def get_gram_matrix(F):
G = tf.matmul(F, tf.transpose(F))
return G
# In[13]:
def style_loss(Gs, As):
total = tf.Variable(tf.constant(0.0, tf.float64), name="style_loss", trainable=False)
style_reps = list(zip(Gs, As))
for layer in style_reps:
loss = tf.reduce_sum(tf.cast(tf.squared_difference(layer[0], layer[1]), tf.float64), [0, 1])
N_layer = tf.shape(layer[0])[0]
M_layer = tf.shape(layer[0])[1]
den = tf.square(tf.cast(tf.multiply(N_layer, M_layer), tf.float64))
loss = loss/den
loss = loss*0.2/4.0 #weighting loss
total = total + loss
return total
# In[14]:
def content_loss(P, F):
# loss = tf.Variable(tf.constant(0.0, tf.float64), name="content_loss", trainable=False)
loss = tf.reduce_sum(tf.cast(tf.squared_difference(P, F), tf.float64), [0, 1])
loss = loss/2.0
return loss
# In[15]:
content_layer_names = ['block4_conv2']
style_layer_names = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1']
# In[32]:
P = tf.squeeze(get_feature_rep('content', content_layer_names, content_model))
# In[34]:
F = tf.squeeze(get_feature_rep('content', content_layer_names, train_model))
# In[18]:
#Each member of As consists of a feature map corresponding to a particular layer (dim. channels x pixels per channel)
As = get_feature_rep('style', style_layer_names, style_model)
# In[19]:
Gs = get_feature_rep('style', style_layer_names, train_model)
# In[20]:
styleloss = style_loss(Gs, As)
# In[21]:
contentloss = content_loss(P, F)
# In[22]:
total_loss = tf.add(styleloss, tf.multiply(tf.constant(0.01, tf.float64), contentloss))
# In[23]:
optimizer = tf.train.AdamOptimizer(5).minimize(total_loss, var_list=[o_input_var])
# In[26]:
def reprocess(x):
VGG_MEAN = [123.68, 116.78, 103.94]
means = tf.reshape(tf.constant(VGG_MEAN, tf.float64), [1, 1, 3])
#Undo mean imagenet scale preprocessing
x = tf.add(x, means)
tf.clip_by_value(x, 0, 255)
#bgr to rgb
x = x[..., ::-1]
return x
# In[27]:
saver = tf.train.Saver(tf.global_variables())
# In[28]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# saver.restore(sess, './model/nst_model.ckpt')
for epoch in range(100):
_, styleloss_curr, contentloss_curr, loss_curr, new_arr = sess.run([optimizer, styleloss, contentloss, total_loss, o_input_var])
print('Epoch: %i Content Loss: %.2f Style Loss: %.2f Total Loss: %.2f' % (epoch, contentloss_curr, styleloss_curr, loss_curr))
if epoch % 15 == 0:
saver.save(sess, './model/nst_model.ckpt')
# In[30]:
with tf.Session() as sess:
new_arr = reprocess(new_arr)
new_im = sess.run(tf.cast(tf.round(tf.squeeze(new_arr)), tf.uint8))
# new_im = new_im[...,::-1]
# print(sess.run(new_arr[0]/255))
print(sess.run(tf.shape(new_im)))
plt.imshow(new_im)