我尝试使用以下脚本训练DNN:
import numpy as np
import os, sys
import argparse
from PIL import Image
from freeze_graph import freeze_graph
import tensorflow as tf
import time
from net import *
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "./"))
from custom_vgg16 import *
# gram matrix per layer
def gram_matrix(x):
assert isinstance(x, tf.Tensor)
b, h, w, ch = x.get_shape().as_list()
features = tf.reshape(x, [b, h*w, ch])
# gram = tf.batch_matmul(features, features, adj_x=True)/tf.constant(ch*w*h, tf.float32)
gram = tf.matmul(features, features, adjoint_a=True)/tf.constant(ch*w*h, tf.float32)
return gram
# total variation denoising
def total_variation_regularization(x, beta=1):
assert isinstance(x, tf.Tensor)
wh = tf.constant([[[[ 1], [ 1], [ 1]]], [[[-1], [-1], [-1]]]], tf.float32)
ww = tf.constant([[[[ 1], [ 1], [ 1]], [[-1], [-1], [-1]]]], tf.float32)
tvh = lambda x: conv2d(x, wh, p='SAME')
tvw = lambda x: conv2d(x, ww, p='SAME')
dh = tvh(x)
dw = tvw(x)
tv = (tf.add(tf.reduce_sum(dh**2, [1, 2, 3]), tf.reduce_sum(dw**2, [1, 2, 3]))) ** (beta / 2.)
return tv
parser = argparse.ArgumentParser(description='Real-time style transfer')
parser.add_argument('--gpu', '-g', default=-1, type=int,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--dataset', '-d', default='dataset', type=str,
help='dataset directory path (according to the paper, use MSCOCO 80k images)')
parser.add_argument('--style_image', '-s', type=str, required=True,
help='style image path')
parser.add_argument('--batchsize', '-b', type=int, default=1,
help='batch size (default value is 1)')
parser.add_argument('--ckpt', '-c', default=None, type=int,
help='the global step of checkpoint file desired to restore.')
parser.add_argument('--lambda_tv', '-l_tv', default=10e-4, type=float,
help='weight of total variation regularization according to the paper to be set between 10e-4 and 10e-6.')
parser.add_argument('--lambda_feat', '-l_feat', default=1e0, type=float)
parser.add_argument('--lambda_style', '-l_style', default=1e1, type=float)
parser.add_argument('--epoch', '-e', default=2, type=int)
parser.add_argument('--lr', '-l', default=1e-3, type=float)
parser.add_argument('--pb', '-pb', default=True, type=bool, help='save a pb format as well.')
args = parser.parse_args()
data_dict = loadWeightsData('./vgg16.npy')
batchsize = args.batchsize
gpu = args.gpu
dataset = args.dataset
epochs = args.epoch
learning_rate = args.lr
ckpt = args.ckpt
lambda_tv = args.lambda_tv
lambda_f = args.lambda_feat
lambda_s = args.lambda_style
style_image = args.style_image
save_pb = args.pb
gpu = args.gpu
style_name, _ = os.path.splitext(style_image.split(os.sep)[-1])
fpath = os.listdir(args.dataset)
imagepaths = []
for fn in fpath:
base, ext = os.path.splitext(fn)
if ext == '.jpg' or ext == '.png':
imagepath = os.path.join(dataset, fn)
imagepaths.append(imagepath)
data_len = len(imagepaths)
iterations = int(data_len / batchsize)
print ('Number of traning images: {}'.format(data_len))
print ('{} epochs, {} iterations per epoch'.format(epochs, iterations))
style_np = np.asarray(Image.open(style_image).convert('RGB').resize((224, 224)), dtype=np.float32)
styles_np = [style_np for x in range(batchsize)]
if gpu > -1:
device = '/gpu:{}'.format(gpu)
else:
device = '/cpu:0'
with tf.device(device):
inputs = tf.placeholder(tf.float32, shape=[batchsize, 224, 224, 3], name='input')
net = FastStyleNet()
saver = tf.train.Saver(restore_sequentially=True)
saver_def = saver.as_saver_def()
target = tf.placeholder(tf.float32, shape=[batchsize, 224, 224, 3])
outputs = net(inputs)
# style target feature
# compute gram maxtrix of style target
vgg_s = custom_Vgg16(target, data_dict=data_dict)
feature_ = [vgg_s.conv1_2, vgg_s.conv2_2, vgg_s.conv3_3, vgg_s.conv4_3, vgg_s.conv5_3]
gram_ = [gram_matrix(l) for l in feature_]
# content target feature
vgg_c = custom_Vgg16(inputs, data_dict=data_dict)
feature_ = [vgg_c.conv1_2, vgg_c.conv2_2, vgg_c.conv3_3, vgg_c.conv4_3, vgg_c.conv5_3]
# feature after transformation
vgg = custom_Vgg16(outputs, data_dict=data_dict)
feature = [vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3]
# compute feature loss
loss_f = tf.zeros(batchsize, tf.float32)
for f, f_ in zip(feature, feature_):
loss_f += lambda_f * tf.reduce_mean(tf.subtract(f, f_) ** 2, [1, 2, 3])
# compute style loss
gram = [gram_matrix(l) for l in feature]
loss_s = tf.zeros(batchsize, tf.float32)
for g, g_ in zip(gram, gram_):
loss_s += lambda_s * tf.reduce_mean(tf.subtract(g, g_) ** 2, [1, 2])
# total variation denoising
loss_tv = lambda_tv * total_variation_regularization(outputs)
# total loss
loss = loss_s + loss_f + loss_tv
# optimizer
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
ckpt_directory = './ckpts/{}/'.format(style_name)
if not os.path.exists(ckpt_directory):
os.makedirs(ckpt_directory)
# training
tf.global_variables_initializer().run()
if ckpt:
if ckpt < 0:
checkpoint = tf.train.get_checkpoint_state(ckpt_directory)
input_checkpoint = checkpoint.model_checkpoint_path
else:
input_checkpoint = ckpt_directory + style_name + '-{}'.format(ckpt)
saver.restore(sess, input_checkpoint)
print ('Checkpoint {} restored.'.format(ckpt))
for epoch in range(1, epochs + 1):
imgs = np.zeros((batchsize, 224, 224, 3), dtype=np.float32)
for i in range(iterations):
for j in range(batchsize):
p = imagepaths[i * batchsize + j]
imgs[j] = np.asarray(Image.open(p).convert('RGB').resize((224, 224)), np.float32)
feed_dict = {inputs: imgs, target: styles_np}
loss_, _= sess.run([loss, train_step,], feed_dict=feed_dict)
print('[epoch {}/{}] batch {}/{}... loss: {}'.format(epoch, epochs, i + 1, iterations, loss_[0]))
saver.save(sess, ckpt_directory + style_name, global_step=epoch)
if save_pb:
if not os.path.exists('./pbs'):
os.makedirs('./pbs')
freeze_graph(ckpt_directory, './pbs/{}.pb'.format(style_name), 'output')
当我跑步的时候,它会在图像上进行训练(我目前只使用一个图像来完成整个过程)并在命令行打印出来:
D:\myName\tensorflow-fast-neuralstyle>python train.py -s picasso.jpg -d trainTest -g 0
C:\ProgramData\Anaconda3\lib\site-packages\h5py\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Number of traning images: 1
2 epochs, 1 iterations per epoch
2018-05-16 18:47:33.268196: I T:\src\github\tensorflow\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2018-05-16 18:47:33.582973: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1356] Found device 0 with properties:
name: GeForce GTX 1070 major: 6 minor: 1 memoryClockRate(GHz): 1.645
pciBusID: 0000:01:00.0
totalMemory: 8.00GiB freeMemory: 6.63GiB
2018-05-16 18:47:33.590004: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-16 18:47:34.243696: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-16 18:47:34.247206: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:929] 0
2018-05-16 18:47:34.249841: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:942] 0: N
2018-05-16 18:47:34.252015: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6405 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
[epoch 1/2] batch 1/1... loss: 32216618.0
[epoch 2/2] batch 1/1... loss: 27523674.0
2018-05-16 18:47:55.451428: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-16 18:47:55.456462: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-16 18:47:55.462478: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:929] 0
2018-05-16 18:47:55.465806: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:942] 0: N
2018-05-16 18:47:55.468555: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6405 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
哪个都没关系,直到我收到此错误:
InvalidArgumentError (see above for traceback): Cannot assign a device for operation 'save/SaveV2': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
device='CPU'
似乎脚本可以找到我的GPU运行,但有些事情阻止它完成我不理解。关于此错误的所有其他帖子都说设置了&#39; allow_soft_placement&#39;参数为True,但在这个脚本中它已经是。
任何帮助都会受到大力赞赏。 谢谢!
p.s这个generate.py文件将使用经过训练的模型
import numpy as np
import argparse
import tensorflow as tf
import os
from PIL import Image
parser = argparse.ArgumentParser(description='Real-time style transfer image generator')
parser.add_argument('--input', '-i', type=str, help='content image')
parser.add_argument('--gpu', '-g', default=-1, type=int,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--style', '-s', default=None, type=str, help='style model name')
parser.add_argument('--ckpt', '-c', default=-1, type=int, help='checkpoint to be loaded')
parser.add_argument('--out', '-o', default='stylized_image.jpg', type=str, help='stylized image\'s name')
parser.add_argument('--pb', '-pb', default=False, type=bool, help='load with pb')
args = parser.parse_args()
if not os.path.exists('./images/output/'):
os.makedirs('./images/output/')
outfile_path = './images/output/' + args.out
content_image_path = args.input
style_name = args.style
ckpt = args.ckpt
load_with_pb = args.pb
gpu = args.gpu
original_image = Image.open(content_image_path).convert('RGB')
img = np.asarray(original_image.resize((224, 224)), dtype=np.float32)
shaped_input = img.reshape((1,) + img.shape)
if gpu > -1:
device = '/gpu:{}'.format(gpu)
else:
device = '/cpu:0'
with tf.device(device):
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
if load_with_pb:
from tensorflow.core.framework import graph_pb2
graph_def = graph_pb2.GraphDef()
with open('./pbs/{}.pb'.format(style_name), "rb") as f:
graph_def.ParseFromString(f.read())
input_image, output = tf.import_graph_def(graph_def, return_elements=['input:0', 'output:0'])
else:
if ckpt < 0:
checkpoint = tf.train.get_checkpoint_state('./ckpts/{}/'.format(style_name))
input_checkpoint = checkpoint.model_checkpoint_path
else:
input_checkpoint = './ckpts/{}/{}-{}'.format(style_name, style_name, ckpt)
saver = tf.train.import_meta_graph(input_checkpoint + '.meta')
saver.restore(sess, input_checkpoint)
graph = tf.get_default_graph()
input_image = graph.get_tensor_by_name('input:0')
output = graph.get_tensor_by_name('output:0')
out = sess.run(output, feed_dict={input_image: shaped_input})
out = out.reshape((out.shape[1:]))
im = Image.fromarray(np.uint8(out))
im = im.resize(original_image.size, resample=Image.LANCZOS)
im.save(outfile_path)
答案 0 :(得分:0)
tf.train.Saver
等I / O节点无法放置在GPU上。 创建网络后,在tf.device
上下文之外创建它们。