CODE:
image_row = 640
image_col = 480
num_labels = 17
num_channels = 3 # grayscale
import numpy as np
#Load data
train_dataset, train_labels = load_file.load_data()
test_dataset = scipy.misc.imread("1501005004.548261985.png")
test_labels = np.loadtxt("1501005004.493062654.txt", comments="#", delimiter=",", unpack=False)
batch_labels = train_labels
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_row, image_col, num_channels)).astype(np.float32)
#labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
labels = labels.reshape((-1,num_labels)).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def accuracy(labels,predictions):
return 100.0 * tf.reduce_sum(tf.pow(predictions - labels,2))
batch_size = 1
kernel_size = patch_size =5
depth = 16
num_hidden1 = 64
num_hidden2 = 32
graph = tf.Graph()
with graph.as_default():
#Input data
tf_train_dataset = tf.placeholder(tf.float32,shape=([batch_size, image_row, image_col, num_channels]))
tf_train_labels = tf.placeholder(tf.float32,shape=([batch_size, num_labels]))
tf_test_dataset = tf.constant(test_dataset)
# Variables.
layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1))
layer1_biases = tf.Variable(tf.zeros([depth]))
# dropout
keep_prob = tf.placeholder("float")
layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
layer3_weights = tf.Variable(tf.truncated_normal([image_row // 4 * image_col // 4 * depth, num_hidden1], stddev=0.1))
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden1]))
layer4_weights = tf.Variable(tf.truncated_normal([num_hidden1, num_hidden2], stddev=0.1))
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden2]))
layer5_weights = tf.Variable(tf.truncated_normal([num_hidden2, num_labels], stddev=0.1))
layer5_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
def model(data):
conv = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases)
# pooling
pool1 = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],padding='SAME', name='pool1')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm1')
# layer2
conv = tf.nn.conv2d(norm1, layer2_weights, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
# pooling2
pool2 = tf.nn.max_pool(hidden, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],padding='SAME', name='pool1')
norm2 = tf.nn.lrn(pool2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,name='norm1')
# layer3
conv = tf.nn.conv2d(norm2, layer2_weights, [1, 1, 1, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
# RELU - 1e-9
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
hidden = tf.matmul(hidden, layer4_weights) + layer4_biases
# # add a dropout
# hidden = tf.nn.dropout(hidden, keep_prob)
result = tf.matmul(hidden, layer5_weights) + layer5_biases
return result
logits = model(tf_train_dataset)
print ('AFTER LOGITS')
embed()
loss = tf.reduce_sum(tf.pow(logits-tf_train_labels,2))/(2*batch_size)
#loss = tf.reduce_sum(tf.pow(logits-batch_labels,2))/(2*batch_size)
global_step = tf.Variable(0, trainable = False)
start_learning_rate = 0.001
learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, 100000, 0.96,staircase = True)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
#Prediction
train_prediction = logits
test_prediction = tf_test_dataset
num_steps = 10000001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('----------------INITIALIZED-----------------')
for step in range(num_steps):
print(step)
offset = (step * batch_size)% (train_labels.shape[0] - batch_size)
print('after offset')
embed()
batch_data = train_dataset[offset: (offset+batch_size), :,:,:]
batch_labels = train_labels[offset: (offset + batch_size),:]
feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels, keep_prob:1.0}
_,l,prediction = session.run([optimizer, loss,train_prediction], feed_dict= feed_dict)
print('after _,l,prediction')
embed()
if(step % 50 ==0):
print("Minibatch loss %d: %f"%(step,l))
print('Minibatch accuracy:' % accuracy(prediction, batch_labels))
在上面的代码中,我在上一个隐藏图层中获得了大量 Inf 值,其输出粘贴在下面:
In [93]: session.run(hidden)
Out[93]:
array([[ 9.99999972e-10, 9.99999972e-10, 9.99999972e-10,
inf, 9.99999972e-10, 5.50044295e+28,
9.99999972e-10, 9.99999972e-10, 3.21215463e+28,
9.99999972e-10, 1.24344986e+28, 9.99999972e-10,
9.99999972e-10, 2.52180816e+28, 9.99999972e-10,
9.99999972e-10, 9.99999972e-10, 9.99999972e-10,
1.41978562e+28, inf, 9.99999972e-10,
如何避免这些Inf值。我是Deep Learning和Tensorflow的初学者,因此我不知道如何去做这些。
我尝试在relu图层中添加一个常量:hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases +1e-9)
,但它没有帮助。
我该怎么办呢?
答案 0 :(得分:0)
通常,此问题会出现爆炸性渐变,您需要剪切渐变。
# Replace this lines with the following
>optimizer=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
grads_vars = optimizer.compute_gradients(loss, tf.trainable_variables())
grads_vars = clip_grad_norms(grads_vars, max_norm=10)
train_op = optimizer.apply_gradients(grads_vars)
# finally
> _,l,prediction = session.run([optimizer, loss,train_prediction], feed_dict= feed_dict)
#replace with
_,l,prediction = session.run([train_op, loss,train_prediction], feed_dict= feed_dict)
# clip_grad_norms function link
https://github.com/n3011/tefla/blob/master/tefla/core/base.py#L253
答案 1 :(得分:0)
从上面的代码中可以看出,您还没有在第4层和第5层之间获得任何激活功能。
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
hidden = tf.matmul(hidden, layer4_weights) + layer4_biases
result = tf.matmul(hidden, layer5_weights) + layer5_biases
return result
根据您对重量和偏差的初始化,它可能是体重过大/过低的原因。