卷积网络的异常性能问题

时间:2018-02-13 19:53:14

标签: python performance tensorflow

有几天我有一个看似无法解决的问题。我正在尝试使用convo-network解决分类任务:

def cnn_net(input_layer, labels, dropout):
"""Model function for CNN."""
i = 0
i += 1
shape = [2, 2, 1, 8]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv1 = tf.nn.conv2d(input_layer, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv1 = tf.nn.relu(tf.add(conv1, b))
pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [4, 4, 8, 8]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv2 = tf.nn.conv2d(pool1, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv2 = tf.nn.relu(tf.add(conv2, b))
pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [8, 8, 8, 8]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv3 = tf.nn.conv2d(pool2, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv3 = tf.nn.relu(tf.add(conv3, b))
pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [8, 8, 8, 8]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv4 = tf.nn.conv2d(pool3, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv4 = tf.nn.relu(tf.add(conv4, b))
pool4 = tf.nn.max_pool(conv4, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [8, 8, 8, 16]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv5 = tf.nn.conv2d(pool4, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv5 = tf.nn.relu(tf.add(conv5, b))
pool5 = tf.nn.max_pool(conv5, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [16, 16, 16, 16]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv6 = tf.nn.conv2d(pool5, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv6 = tf.nn.relu(tf.add(conv6, b))
pool6 = tf.nn.max_pool(conv6, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [16, 16, 16, 16]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv7 = tf.nn.conv2d(pool6, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv7 = tf.nn.relu(tf.add(conv7, b))
pool7 = tf.nn.max_pool(conv7, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [32, 32, 16, 32]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv8 = tf.nn.conv2d(pool7, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv8 = tf.nn.relu(tf.add(conv8, b))
pool8 = tf.nn.max_pool(conv8, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [220, 220, 32, 16]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv9 = tf.nn.conv2d(pool8, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv9 = tf.nn.relu(tf.add(conv9, b))
pool9 = tf.nn.max_pool(conv9, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

i += 1
shape = [70, 70, 16, 1]
W = tf.Variable(tf.truncated_normal(shape, stddev=0.1))
b = tf.Variable(tf.constant(0.1, shape=[shape[-1]]))
conv10 = tf.nn.conv2d(pool9, W, strides=[1, 1, 1, 1], padding='VALID', name='conv' + str(i))
conv10 = tf.nn.relu(tf.add(conv10, b))
pool10 = tf.nn.max_pool(conv10, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='VALID', name='conv_pool_' + str(i))

last_layer = pool10

print last_layer.get_shape().as_list()
#dim = reduce(operator.mul, [int(x) for x in last_layer.shape[1:]], 1)
dim = 256
reduced = 512

# Dense Layer
output_flat = tf.reshape(last_layer, [-1, dim])

dense_w = tf.Variable(tf.truncated_normal([dim, reduced], stddev=0.1))
dense_b = tf.Variable(tf.constant(0.1, shape=[reduced]))
dense1 = tf.nn.sigmoid(tf.matmul(output_flat, dense_w)+dense_b)
dropout1 = tf.layers.dropout(inputs=dense1, rate=dropout)

dense_w = tf.Variable(tf.truncated_normal([reduced, 3], stddev=0.1))
dense_b = tf.Variable(tf.constant(0.1, shape=[3]))
logits = tf.matmul(dropout1, dense_w) + dense_b
losses = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)
loss = tf.reduce_mean(losses)

train_operation = tf.train.AdamOptimizer().minimize(loss)

return logits, train_operation, loss

培训本身就是这样:

image_size = (400, 400)
inputs = tf.placeholder(tf.float32, [None, image_size[0], image_size[1], 1], name='input_data')
targets = tf.placeholder(tf.int8, [None, 3], name='targets')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')
logits, train_op, train_loss = cnn_net(inputs, targets, keep_prob)
session = tf.Session()
saver = tf.train.Saver(max_to_keep=1)
ckpt = tf.train.get_checkpoint_state(LOG)
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(session, ckpt.model_checkpoint_path)
    print "Model restored..."
else:
    session.run(tf.global_variables_initializer())
    saver.save(session, LOG + "model.ckpt", 0)
    print "New model created"
tf.get_default_graph().finalize()

if TRAIN_FLAG == 'train':
    print 'train...'
    train_count = 0
    valid_set = tuple(chart_generator(batch_size, window_size, observation_size, image_size, valid=True))
    train_set = tuple(chart_generator(batch_size, window_size, observation_size, image_size))
    for i in range(epochs):
        traind_scores = []
        ii = 0
        epoch_loss = []
        for X_batch, y_batch in train_set:
            train_count += 1
            # for k in range(16):
            #     print y_batch[k]
            #     cv2.imshow('chart', X_batch[k])
            #     cv2.waitKey(1000)
            t = time.time()
            print 'starting...'
            print X_batch.shape, y_batch.shape
            _, c = session.run((train_loss, train_op), feed_dict={inputs: X_batch, targets: y_batch, keep_prob: 0.5})

输入数据具有形状(32,400,400,1),目标数据具有形状(32,3)。我的PC运行Linux薄荷与GPU Nvidia GTX 1070我测试了Cuda 8(TF 1.3)和cudNN 6并切换到Cuda 9(TF 1.5)与cudNN 7,以找出什么是错误的。如果我开始训练,tensorflow告诉我它找到了我的GPU,一切都很好。但是第一次训练迭代(一批32张图像)耗时5.2小时,之后的每个训练步骤耗时45秒。

几周前,我在这台机器上训练了另一个完全卷积网络的方式,每次迭代时间为0.2秒:

def inference_conv(self, image, keep_prob=1.0):
        """
        Semantic segmentation network definition
        :param image: input image. Should have values in range 0-255
        :param keep_prob:
        :return:
        """
        conv1 = self.complete_conv2d(image, [9, 9, self.NUM_CHANNELS, 16], tf.nn.relu)
        conv2 = self.complete_conv2d(conv1, [9, 9, 16, 16], tf.nn.relu)
        pool1 = self.max_pool_2x2(conv2)
        conv3 = self.complete_conv2d(pool1, [9, 9, 16, 32], tf.nn.relu)
        conv4 = self.complete_conv2d(conv3, [3, 3, 32, 32], tf.nn.relu)
        pool2 = self.max_pool_2x2(conv4)
        conv5 = self.complete_conv2d(pool2, [3, 3, 32, 32], tf.nn.relu)
        # conv6 = complete_conv2d(conv5, [3, 3, 32, 32], True)
        # conv7 = complete_conv2d(conv6, [3, 3, 32, 32], True)
        conv8 = self.complete_conv2d(conv5, [3, 3, 32, 32], tf.nn.relu)
        pool3 = self.max_pool_2x2(conv8)
        conv9 = self.complete_conv2d(pool3, [3, 3, 32, 64], tf.nn.relu)
        # conv10 = complete_conv2d(conv9, [3, 3, 64, 64], True)
        # conv11 = complete_conv2d(conv10, [3, 3, 64, 64], True)
        conv12 = self.complete_conv2d(conv9, [5, 5, 64, 64], tf.nn.relu)
        pool4 = self.max_pool_2x2(conv12)
        conv13 = self.complete_conv2d(pool4, [5, 5, 64, 128], tf.nn.relu)
        # conv14 = complete_conv2d(conv13, [3, 3, 128, 128], True)
        # conv15 = complete_conv2d(conv14, [3, 3, 128, 128], True)
        conv16 = self.complete_conv2d(conv13, [5, 5, 128, 128], tf.nn.relu)

        pool5 = self.max_pool_2x2(conv16)
        conv17 = self.complete_conv2d(pool5, [7, 7, 128, 256], tf.nn.relu)
        dropout1 = tf.nn.dropout(conv17, keep_prob=keep_prob)
        conv18 = self.complete_conv2d(dropout1, [1, 1, 256, 256], tf.nn.relu)
        dropout2 = tf.nn.dropout(conv18, keep_prob=keep_prob)
        conv19 = self.complete_conv2d(dropout2, [1, 1, 256, self.NUM_OF_CLASSESS])
        dropout3 = tf.nn.dropout(conv19, keep_prob=keep_prob)

        deconv1 = self.deconv(dropout3, [4, 4, 64, self.NUM_OF_CLASSESS], tf.shape(pool4), activation=tf.nn.relu)
        fuse1 = tf.add(deconv1, pool4)

        deconv2 = self.deconv(fuse1, [4, 4, 32, 64], tf.shape(pool3), activation=tf.nn.relu)
        fuse2 = tf.add(deconv2, pool3)

        shape = tf.shape(image)
        deconv_shape2 = tf.stack([shape[0], shape[1], shape[2], self.NUM_OF_CLASSESS])
        deconv3 = self.deconv(fuse2, [16, 16, self.NUM_OF_CLASSESS, 32], deconv_shape2, stride=8, activation=tf.nn.relu)

        return deconv3

如果我现在开始这个训练,它仍然是0.2秒,而另一个需要几个小时......有没有人知道我的代码有什么问题?新网络具有更少的参数和相同的错误功能。唯一的区别是完全连接的层(较大的网络是没有任何完全连接层的分段网络)。

0 个答案:

没有答案