使用张量流应用Selu激活功能时出错

时间:2017-07-10 13:48:51

标签: tensorflow nan activation-function

我试图从https://arxiv.org/pdf/1706.02515实施新的SELU激活功能。有关详细信息,请参阅我的代码:

import tensorflow as tf
import numpy as np
from PIL import Image
import os
from keras.activations import elu

batch_size = 32

def weight_variable(kernal_shape):
    weights = tf.get_variable(name='weights', shape=kernal_shape, dtype=tf.float32, trainable=True,
                        initializer=tf.truncated_normal_initializer(stddev=0.02))
    return weights

def bias_variable(shape):
    initial = tf.constant(0.0, shape=shape)
    return tf.Variable(initial)

def selu(x):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    #return scale * tf.where(x >= 0.0, x, alpha * tf.exp(x) - alpha)
    return scale * elu(x, alpha)

def conv_layer(x, w_shape, b_shape, padding='SAME'):
    W = weight_variable(w_shape)
    tf.summary.histogram("weights", W)

    b = bias_variable(b_shape)
    tf.summary.histogram("biases", b)

    # Note that I used a stride of 2 on purpose in order not to use max pool layer.
    activations = selu(tf.nn.conv2d(x, W, strides=[1, 2, 2, 1], padding=padding) + b)
    tf.summary.histogram(activations.name, activations)

    W1 = tf.shape(x)[1]
    W2 = tf.shape(activations)[1]
    F = w_shape[0]
    P = tf.divide(tf.add(tf.subtract(tf.multiply(tf.subtract(W2, 1), 2), W1), F), 2)
    return activations, P

def deconv_layer(x, w_shape, b_shape, padding="SAME"):
    W = weight_variable(w_shape)
    tf.summary.histogram("weights", W)

    b = bias_variable(b_shape)
    tf.summary.histogram('biases', b)

    x_shape = tf.shape(x)

    out_shape = tf.stack([x_shape[0], x_shape[1] * 2, x_shape[2] * 2, w_shape[2]])
    # Note that I have used a stride of 2 since I used a stride of 2 in conv layer.
    transposed_activations = tf.nn.conv2d_transpose(x, W, out_shape, [1, 2, 2, 1], padding=padding) + b
    tf.summary.histogram(transposed_activations.name, transposed_activations)
    return transposed_activations

tfrecords_filename_seq = ["P16_db.tfrecords"]
filename_queue = tf.train.string_input_producer(tfrecords_filename_seq, num_epochs=None, shuffle=False, name='queue')
reader = tf.TFRecordReader()

_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
    serialized_example,
    # Defaults are not specified since both keys are required.
    features={
        'height': tf.FixedLenFeature([], tf.int64),
        'width': tf.FixedLenFeature([], tf.int64),
        'image_raw': tf.FixedLenFeature([], tf.string),
        'annotation_raw': tf.FixedLenFeature([], tf.string)
    })

# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)

# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
image = tf.reshape(image, [height, width, 3])
image = tf.cast([image], tf.float32)

with tf.variable_scope('conv1'):
    conv1, P1 = conv_layer(image, [3, 3, 3, 32], [32])      # image size: [56, 56]
with tf.variable_scope('conv2'):
    conv2, P2 = conv_layer(conv1, [3, 3, 32, 64], [64])     # image size: [28, 28]
with tf.variable_scope('conv3'):
    conv3, P3 = conv_layer(conv2, [3, 3, 64, 128], [128])   # image size: [14, 14]
with tf.variable_scope('conv4'):
    conv4, P4 = conv_layer(conv3, [3, 3, 128, 256], [256])  # image size: [7, 7]
    conv4_reshaped = tf.reshape(conv4, [-1, 7 * 7 * 256], name='conv4_reshaped')

w_c = tf.Variable(tf.truncated_normal([7 * 7 * 256, 100], stddev=0.1), name='weight_fc')
b_c = tf.Variable(tf.constant(0.1, shape=[100]), name='biases_fc')
tf.summary.histogram('weights_c', w_c)
tf.summary.histogram('biases_c', b_c)

with tf.variable_scope('z'):
    z = selu(tf.nn.bias_add(tf.matmul(conv4_reshaped, w_c), b_c))
    tf.summary.histogram('features_z', z)

w_dc = tf.Variable(tf.truncated_normal([100, 7 * 7 * 256], stddev=0.1), name='weights_dc')
b_dc = tf.Variable(tf.constant(0.1, shape=[7 * 7 * 256]), name='biases_dc')
tf.summary.histogram('weights_dc', w_dc)
tf.summary.histogram('biases_dc', b_dc)

with tf.variable_scope('deconv4'):
    deconv4 = selu(tf.nn.bias_add(tf.matmul(z, w_dc), b_dc))
    deconv4_reshaped = tf.reshape(deconv4, [-1, 7, 7, 256], name='deconv4_reshaped')

with tf.variable_scope('deconv3'):
    deconv3 = deconv_layer(deconv4_reshaped, [3, 3, 128, 256], [128])
with tf.variable_scope('deconv2'):
    deconv2 = deconv_layer(deconv3, [3, 3, 64, 128], [64])
with tf.variable_scope('deconv1'):
    deconv1 = deconv_layer(deconv2, [3, 3, 32, 64], [32])
with tf.variable_scope('deconv_image'):
    deconv_image = deconv_layer(deconv1, [3, 3, 3, 32], [3])

with tf.name_scope('loss'):
    loss = tf.reduce_mean(tf.abs(deconv_image - image))
    tf.summary.scalar('loss', loss)
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss)

init_op = tf.group(tf.local_variables_initializer(),
                       tf.global_variables_initializer())

saver = tf.train.Saver()
model_path = 'C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'

# Here is the session...
with tf.Session() as sess:

    train_writer = tf.summary.FileWriter('C:/Users/iayou005/Documents/tensorboard_logs/New_Runs/DeconvNet', sess.graph)
    merged = tf.summary.merge_all()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    step = 0

    sess.run(init_op)

    # Note that the last name "Graph_model" is the name of the saved checkpoints file => the ckpt is saved
    # under tensorboard_logs.
    ckpt = tf.train.get_checkpoint_state(
        os.path.dirname('C:/Users/iayou005/Documents/tensorboard_logs/Graph_model/ckpt'))
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        print('checkpoints are saved!!!')
    else:
        print('No stored checkpoints')

    while step < 100000:

        if step % 1000 == 0:
            img = sess.run([deconv_image])
            img2 = Image.fromarray(np.uint8(img[0][0]))
            img2.save('Reconstructed' + str(step) + '.png', 'png')

        __, loss_s, summary = sess.run([optimizer, loss, merged])
        if step % 100 == 0:
            train_writer.add_summary(summary, step)
            print(loss_s)
        step += 1

    save_path = saver.save(sess, model_path)
    coord.request_stop()
    coord.join(threads)
    train_writer.close()

所以没有使用Keras,我一直得到一个NAN:

InvalidArgumentError (see above for traceback): Nan in summary histogram for: conv1/weights_1
     [[Node: conv1/weights_1 = HistogramSummary[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](conv1/weights_1/tag, conv1/weights/read/_61)]]

我想知道获得NAN的原因。

非常感谢任何帮助!!

0 个答案:

没有答案