我想测试具有2个转换层和两个完全连接层的卷积nn的实现。简单的神经网络模型对我来说很好,但是当我添加转换层时我遇到了问题。最初我想调整不同的超参数以优化模型的性能。 为了尝试理解为什么培训不起作用(验证准确度保持在0.1左右),我还通过TensorBoard添加了可视化。
当我运行以下代码时,只有一组超参数,模型并不是真正的训练,因为准确性永远不会增加。但是,我能够看到TensorBoard我的所有变量都被初始化了,并且更新了biaises,但没有更新不同层的权重矩阵。
这就是我对TensorBoard的看法:
我真的不明白为什么模型会努力更新权重。我知道它有时可以来自初始化,但我认为我使用了正确的选项,对吧?
如果你知道这个bug会在哪里,我真的很感兴趣!
PS:代码不是最优雅的,但是当我看到它没有工作时,我希望它尽可能简单from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
LOGDIR = 'tensorboard_claire/tuning2'
patch_size = 5
kernel_size = 2
depth = 16
num_hidden = 64
def generate_hyperparameters():
# Randomly choose values for the hyperparameters.
return {"learning_rate": 10 ** np.random.uniform(-3, -1),
"batch_size": np.random.randint(1, 100),
"dropout": np.random.uniform(0, 1),
"stddev": 10 ** np.random.uniform(-4, 2)}
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
def reformat(dataset, labels):
dataset = dataset.reshape((-1, image_size, image_size,
num_channels)).astype(np.float32)
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
def conv_layer(data, weights, biases):
conv = tf.nn.conv2d(data, weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + biases)
pool = tf.nn.max_pool(hidden, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
return pool
def reshape_drop(data):
shape = data.get_shape().as_list()
reshape = tf.reshape(data, [shape[0], shape[1] * shape[2] * shape[3]])
return reshape
def train_cnn_and_compute_accuracy(hyperparameters, name='train'):
# Construct a deep network, train it, and return the accuracy on the
# validation data.
batch_size = hyperparameters["batch_size"]
std = hyperparameters["stddev"]
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
# Variables
weights = {
'conv1' : tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=std), name='convw1'),
'conv2' : tf.Variable(tf.random_normal([patch_size, patch_size, depth, depth], stddev=std), name='convw2'),
'fc1' : tf.Variable(tf.random_normal([2 * 2 * depth, num_hidden], stddev=std), name='fcw1'),
'fc2' : tf.Variable(tf.random_normal([num_hidden, num_labels], stddev=std), name='fcw2')
}
biases = {
'conv1' : tf.Variable(tf.zeros([depth]), name='convb1'),
'conv2' : tf.Variable(tf.constant(1.0, shape=[depth]), name='convb2'),
'fc1' : tf.Variable(tf.constant(1.0, shape=[num_hidden]), name='fcb1'),
'fc2' : tf.Variable(tf.constant(1.0, shape=[num_labels]), name='fcb2')
}
# Neural network model with 2 convolutional layers and 2 fully connected layers
# with max pooling and dropout
with tf.name_scope("1st_conv_layer"):
conv_1_train = conv_layer(tf_train_dataset, weights['conv1'], biases['conv1'])
conv_1_valid = conv_layer(tf_valid_dataset, weights['conv1'], biases['conv1'])
tf.summary.histogram("convw1", weights['conv1'])
tf.summary.histogram("convb1", biases['conv1'])
with tf.name_scope("2nd_conv_layer"):
conv_2_train = conv_layer(conv_1_train, weights['conv2'], biases['conv2'])
conv_2_valid = conv_layer(conv_1_valid, weights['conv2'], biases['conv2'])
tf.summary.histogram("convw2", weights['conv2'])
tf.summary.histogram("convb2", biases['conv2'])
with tf.name_scope('dropout'):
dropped_train = tf.nn.dropout(conv_2_train, hyperparameters["dropout"])
dropped_valid = tf.nn.dropout(conv_2_valid, hyperparameters["dropout"])
reshape_train = reshape_drop(dropped_train)
reshape_valid = reshape_drop(dropped_valid)
with tf.name_scope("1st_fc_layer"):
fc1_train = tf.nn.relu(tf.matmul(reshape_train, weights['fc1']) + biases['fc1'])
fc1_valid = tf.nn.relu(tf.matmul(reshape_valid, weights['fc1']) + biases['fc1'])
tf.summary.histogram("fcw1", weights['fc1'])
tf.summary.histogram("fcb1", biases['fc1'])
with tf.name_scope("2nd_fc_layer"):
fc2_train = tf.nn.relu(tf.matmul(fc1_train, weights['fc2']) + biases['fc2'])
fc2_valid = tf.nn.relu(tf.matmul(fc1_valid, weights['fc2']) + biases['fc2'])
tf.summary.histogram("fcw2", weights['fc2'])
tf.summary.histogram("fcb2", biases['fc2'])
# Predictions
logits = fc2_train
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(fc2_valid)
# Loss with or without regularization
with tf.name_scope('xentropy'):
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
tf.summary.scalar("xent", loss)
# Decaying learning rate and GradientDescent optimizer
with tf.name_scope('train'):
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(hyperparameters["learning_rate"], global_step, 100, 0.96, staircase=True)
tf.summary.scalar("learning_rate", learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
with tf.name_scope("valid_accuracy"):
correct_prediction = tf.equal(tf.argmax(valid_prediction, 1), tf.argmax(valid_labels, 1))
#Casts a tensor to a new type.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar("valid_accuracy", accuracy)
num_steps = 1001
val_acc = 0
with tf.Session(graph=graph) as session:
summ = tf.summary.merge_all()
tf.global_variables_initializer().run()
writer = tf.summary.FileWriter(LOGDIR+"/"+make_hparam_string(hyperparameters))
writer.add_graph(session.graph)
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions, summary = session.run([optimizer, loss, train_prediction, summ], feed_dict=feed_dict)
if step in np.arange(0, num_steps, 70):
print("Current step: " + str(step))
val_acc = accuracy.eval()
print("Validation accuracy : " + str(val_acc))
if step % 5 == 0:
writer.add_summary(summary, step)
return val_acc
session.close()
writer.close()
def make_hparam_string(h):
learning_rate = h["learning_rate"]
batch_size = h["batch_size"]
dropout = h["dropout"]
stddev = h["stddev"]
return ("lr_" + str(learning_rate) + ",dp_" + str(dropout) + ",batch_size_" + str(batch_size) + ",stddev_" + str(stddev))
# Generate a bunch of hyperparameter configurations.
hyperparameter_configurations = [generate_hyperparameters() for _ in range(5)]
# Launch some experiments.
results = []
for hyperparameters in hyperparameter_configurations:
print("Hyperparameters : ", hyperparameters.values())
acc = train_cnn_and_compute_accuracy(hyperparameters)
results.append(acc)
答案 0 :(得分:3)
代码有点混乱,但无论如何,100的标准是巨大的,它应该在0.1左右。接下来的事情是你不应该在soft max之前的最后一层使用relu(或任何其他激活函数)。然后辍学限制也很宽,如果你想保留它们,至少尝试删除辍学并确保网络可以在没有它的情况下进行训练(如果你随机获得0.1,你的权重很难得到更新)并在之后返回。
首先尝试解决这个问题,如果它没有帮助,我们可以仔细观察。