Question

我尝试使用以下脚本训练DNN：

import numpy as np
import os, sys
import argparse
from PIL import Image
from freeze_graph import freeze_graph
import tensorflow as tf
import time

from net import *
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "./"))
from custom_vgg16 import *


# gram matrix per layer
def gram_matrix(x):
    assert isinstance(x, tf.Tensor)
    b, h, w, ch = x.get_shape().as_list()
    features = tf.reshape(x, [b, h*w, ch])
    # gram = tf.batch_matmul(features, features, adj_x=True)/tf.constant(ch*w*h, tf.float32)
    gram = tf.matmul(features, features, adjoint_a=True)/tf.constant(ch*w*h, tf.float32)
    return gram

# total variation denoising
def total_variation_regularization(x, beta=1):
    assert isinstance(x, tf.Tensor)
    wh = tf.constant([[[[ 1], [ 1], [ 1]]], [[[-1], [-1], [-1]]]], tf.float32)
    ww = tf.constant([[[[ 1], [ 1], [ 1]], [[-1], [-1], [-1]]]], tf.float32)
    tvh = lambda x: conv2d(x, wh, p='SAME')
    tvw = lambda x: conv2d(x, ww, p='SAME')
    dh = tvh(x)
    dw = tvw(x)
    tv = (tf.add(tf.reduce_sum(dh**2, [1, 2, 3]), tf.reduce_sum(dw**2, [1, 2, 3]))) ** (beta / 2.)
    return tv

parser = argparse.ArgumentParser(description='Real-time style transfer')
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--dataset', '-d', default='dataset', type=str,
                    help='dataset directory path (according to the paper, use MSCOCO 80k images)')
parser.add_argument('--style_image', '-s', type=str, required=True,
                    help='style image path')
parser.add_argument('--batchsize', '-b', type=int, default=1,
                    help='batch size (default value is 1)')
parser.add_argument('--ckpt', '-c', default=None, type=int,
                    help='the global step of checkpoint file desired to restore.')
parser.add_argument('--lambda_tv', '-l_tv', default=10e-4, type=float,
                    help='weight of total variation regularization according to the paper to be set between 10e-4 and 10e-6.')
parser.add_argument('--lambda_feat', '-l_feat', default=1e0, type=float)
parser.add_argument('--lambda_style', '-l_style', default=1e1, type=float)
parser.add_argument('--epoch', '-e', default=2, type=int)
parser.add_argument('--lr', '-l', default=1e-3, type=float)
parser.add_argument('--pb', '-pb', default=True, type=bool, help='save a pb format as well.')
args = parser.parse_args()

data_dict = loadWeightsData('./vgg16.npy')

batchsize = args.batchsize
gpu = args.gpu
dataset = args.dataset
epochs = args.epoch
learning_rate = args.lr
ckpt = args.ckpt
lambda_tv = args.lambda_tv
lambda_f = args.lambda_feat
lambda_s = args.lambda_style
style_image = args.style_image
save_pb = args.pb
gpu = args.gpu

style_name, _ = os.path.splitext(style_image.split(os.sep)[-1])

fpath = os.listdir(args.dataset)
imagepaths = []
for fn in fpath:
    base, ext = os.path.splitext(fn)
    if ext == '.jpg' or ext == '.png':
        imagepath = os.path.join(dataset, fn)
        imagepaths.append(imagepath)
data_len = len(imagepaths)
iterations = int(data_len / batchsize)
print ('Number of traning images: {}'.format(data_len))
print ('{} epochs, {} iterations per epoch'.format(epochs, iterations))

style_np = np.asarray(Image.open(style_image).convert('RGB').resize((224, 224)), dtype=np.float32)
styles_np = [style_np for x in range(batchsize)]


if gpu > -1:
    device = '/gpu:{}'.format(gpu)
else:
    device = '/cpu:0'

with tf.device(device):

    inputs = tf.placeholder(tf.float32, shape=[batchsize, 224, 224, 3], name='input')
    net = FastStyleNet()
    saver = tf.train.Saver(restore_sequentially=True)
    saver_def = saver.as_saver_def()


    target = tf.placeholder(tf.float32, shape=[batchsize, 224, 224, 3])
    outputs = net(inputs)

    # style target feature
    # compute gram maxtrix of style target
    vgg_s = custom_Vgg16(target, data_dict=data_dict)
    feature_ = [vgg_s.conv1_2, vgg_s.conv2_2, vgg_s.conv3_3, vgg_s.conv4_3, vgg_s.conv5_3]
    gram_ = [gram_matrix(l) for l in feature_]

    # content target feature 
    vgg_c = custom_Vgg16(inputs, data_dict=data_dict)
    feature_ = [vgg_c.conv1_2, vgg_c.conv2_2, vgg_c.conv3_3, vgg_c.conv4_3, vgg_c.conv5_3]

    # feature after transformation 
    vgg = custom_Vgg16(outputs, data_dict=data_dict)
    feature = [vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3]

    # compute feature loss
    loss_f = tf.zeros(batchsize, tf.float32)
    for f, f_ in zip(feature, feature_):
        loss_f += lambda_f * tf.reduce_mean(tf.subtract(f, f_) ** 2, [1, 2, 3])

    # compute style loss
    gram = [gram_matrix(l) for l in feature]
    loss_s = tf.zeros(batchsize, tf.float32)
    for g, g_ in zip(gram, gram_):
        loss_s += lambda_s * tf.reduce_mean(tf.subtract(g, g_) ** 2, [1, 2])

    # total variation denoising
    loss_tv = lambda_tv * total_variation_regularization(outputs)

    # total loss
    loss = loss_s + loss_f + loss_tv

    # optimizer
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:

    ckpt_directory = './ckpts/{}/'.format(style_name)
    if not os.path.exists(ckpt_directory):
        os.makedirs(ckpt_directory)

    # training
    tf.global_variables_initializer().run()

    if ckpt:
        if ckpt < 0:
            checkpoint = tf.train.get_checkpoint_state(ckpt_directory)
            input_checkpoint = checkpoint.model_checkpoint_path
        else:
            input_checkpoint =  ckpt_directory + style_name + '-{}'.format(ckpt)
        saver.restore(sess, input_checkpoint)
        print ('Checkpoint {} restored.'.format(ckpt))

    for epoch in range(1, epochs + 1):
        imgs = np.zeros((batchsize, 224, 224, 3), dtype=np.float32)
        for i in range(iterations):
            for j in range(batchsize):
                p = imagepaths[i * batchsize + j]
                imgs[j] = np.asarray(Image.open(p).convert('RGB').resize((224, 224)), np.float32)
            feed_dict = {inputs: imgs, target: styles_np}
            loss_, _= sess.run([loss, train_step,], feed_dict=feed_dict)
            print('[epoch {}/{}] batch {}/{}... loss: {}'.format(epoch, epochs, i + 1, iterations, loss_[0]))    
        saver.save(sess, ckpt_directory + style_name, global_step=epoch)

if save_pb:
    if not os.path.exists('./pbs'):
        os.makedirs('./pbs')
    freeze_graph(ckpt_directory, './pbs/{}.pb'.format(style_name), 'output')

当我跑步的时候，它会在图像上进行训练（我目前只使用一个图像来完成整个过程）并在命令行打印出来：

D:\myName\tensorflow-fast-neuralstyle>python train.py -s picasso.jpg -d trainTest -g 0
C:\ProgramData\Anaconda3\lib\site-packages\h5py\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Number of traning images: 1
2 epochs, 1 iterations per epoch
2018-05-16 18:47:33.268196: I T:\src\github\tensorflow\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2018-05-16 18:47:33.582973: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1356] Found device 0 with properties:
name: GeForce GTX 1070 major: 6 minor: 1 memoryClockRate(GHz): 1.645
pciBusID: 0000:01:00.0
totalMemory: 8.00GiB freeMemory: 6.63GiB
2018-05-16 18:47:33.590004: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-16 18:47:34.243696: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-16 18:47:34.247206: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:929]      0
2018-05-16 18:47:34.249841: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:942] 0:   N
2018-05-16 18:47:34.252015: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6405 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
[epoch 1/2] batch 1/1... loss: 32216618.0
[epoch 2/2] batch 1/1... loss: 27523674.0
2018-05-16 18:47:55.451428: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-16 18:47:55.456462: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-16 18:47:55.462478: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:929]      0
2018-05-16 18:47:55.465806: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:942] 0:   N
2018-05-16 18:47:55.468555: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6405 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)

哪个都没关系，直到我收到此错误：

InvalidArgumentError (see above for traceback): Cannot assign a device for operation 'save/SaveV2': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
  device='CPU'

似乎脚本可以找到我的GPU运行，但有些事情阻止它完成我不理解。关于此错误的所有其他帖子都说设置了＆＃39; allow_soft_placement＆＃39;参数为True，但在这个脚本中它已经是。

任何帮助都会受到大力赞赏。谢谢！

p.s这个generate.py文件将使用经过训练的模型

import numpy as np
import argparse
import tensorflow as tf
import os
from PIL import Image

parser = argparse.ArgumentParser(description='Real-time style transfer image generator')
parser.add_argument('--input', '-i', type=str, help='content image')
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--style', '-s', default=None, type=str, help='style model name')
parser.add_argument('--ckpt', '-c', default=-1, type=int, help='checkpoint to be loaded')
parser.add_argument('--out', '-o', default='stylized_image.jpg', type=str, help='stylized image\'s name')
parser.add_argument('--pb', '-pb', default=False, type=bool, help='load with pb')

args = parser.parse_args()

if not os.path.exists('./images/output/'):
        os.makedirs('./images/output/')

outfile_path = './images/output/' + args.out
content_image_path = args.input
style_name = args.style
ckpt = args.ckpt
load_with_pb = args.pb
gpu = args.gpu

original_image = Image.open(content_image_path).convert('RGB')

img = np.asarray(original_image.resize((224, 224)), dtype=np.float32)
shaped_input = img.reshape((1,) + img.shape)

if gpu > -1:
    device = '/gpu:{}'.format(gpu)
else:
    device = '/cpu:0'


with tf.device(device):
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if load_with_pb:
            from tensorflow.core.framework import graph_pb2
            graph_def = graph_pb2.GraphDef()
            with open('./pbs/{}.pb'.format(style_name), "rb") as f:
                graph_def.ParseFromString(f.read())
            input_image, output = tf.import_graph_def(graph_def, return_elements=['input:0', 'output:0'])

        else:
            if ckpt < 0:
                checkpoint = tf.train.get_checkpoint_state('./ckpts/{}/'.format(style_name))
                input_checkpoint = checkpoint.model_checkpoint_path
            else:
                input_checkpoint = './ckpts/{}/{}-{}'.format(style_name, style_name, ckpt)
            saver = tf.train.import_meta_graph(input_checkpoint + '.meta')
            saver.restore(sess, input_checkpoint)
            graph = tf.get_default_graph()

            input_image = graph.get_tensor_by_name('input:0')
            output = graph.get_tensor_by_name('output:0')

        out = sess.run(output, feed_dict={input_image: shaped_input})

out = out.reshape((out.shape[1:]))
im = Image.fromarray(np.uint8(out))

im = im.resize(original_image.size, resample=Image.LANCZOS)
im.save(outfile_path)

Answer 1

tf.train.Saver等I / O节点无法放置在GPU上。创建网络后，在tf.device上下文之外创建它们。

如何让我的tensorflow脚本在我的GPU上运行？

1 个答案: