我正在Udacity深度学习课程的非MNIST Assignment 4上使用convnet探索TensorFlow模块。
我的初始模型使用tf.nn.conv2d
和tf.nn.max_pool
,并提供约90%的验证准确度
初始模型,tf.nn.*
batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242
tf.reset_default_graph()
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
weights = tf.get_variable("weights", shape = kernel_shape,
initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
biases = tf.get_variable("biases", shape = bias_shape,
initializer = tf.constant_initializer(bias_init_v))
conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
hidden = tf.nn.relu(conv + biases)
maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')
return maxpool
def dense(inputs, units, activation=None):
return tf.layers.dense(
inputs,
units=units,
activation=activation,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.05, seed=seed),
bias_initializer=tf.constant_initializer(0.1),
name="Dense"
)
# Model.
def model(data, keep_prob=1):
with tf.variable_scope("conv1"):
layer1 = conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)
with tf.variable_scope("conv2"):
layer2 = conv_relu_maxpool(layer1, [patch_size, patch_size, depth, depth], [depth], 1.0)
shape = layer2.get_shape().as_list()
reshape = tf.reshape(layer2, [shape[0], shape[1] * shape[2] * shape[3]])
with tf.variable_scope("dense1"):
dense1 = dense( reshape, 64, activation=tf.nn.relu)
dropout1 = tf.nn.dropout( dense1, keep_prob=keep_prob, seed=seed)
with tf.variable_scope("dense2"):
dense2 = dense( dense1, 32, activation=tf.nn.relu)
dropout2 = tf.nn.dropout( dense2, keep_prob=keep_prob, seed=seed)
with tf.variable_scope("dense3"):
dense3 = dense( dropout2, num_labels)
return dense3
with tf.variable_scope("model"):
# Training computation.
logits = model(tf_train_dataset, keep_prob=0.7)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
# Optimizer.
global_step = tf.Variable(0) # count the number of steps taken.
learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
with tf.variable_scope("model", reuse=True):
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
with tf.variable_scope("model", reuse=True):
test_prediction = tf.nn.softmax(model(tf_test_dataset))
我将其重写为tf.layers.conv2d
和tf.layers.max_pooling2d
,然后是tf.layers.dense
和tf.layers.dropout
。现在模型无法训练,运行速度太快,并且提供10%(随机选择)验证准确性。无法弄清楚是什么原因。
图表:
batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242
graph = tf.Graph()
tf.reset_default_graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(
tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Model.
def model(data, keep_prob=0.5, training=False):
conv_1 = tf.layers.conv2d(
data, filters=depth, kernel_size=1, padding='SAME',
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(0.0),
name="Conv_1"
)
pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')
conv_2 = tf.layers.conv2d(
pool_1, filters=depth, kernel_size=1, padding='SAME',
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=2*seed),
bias_initializer=tf.constant_initializer(1.0),
name="Conv_2"
)
pool_2 = tf.layers.max_pooling2d( conv_2, pool_size=patch_size, strides=2, padding='SAME')
shape = pool_2.get_shape().as_list()
reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])
dense_1 = tf.layers.dense(
reshape, units=num_hidden, activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(1.0)
)
dropout_1 = tf.layers.dropout( dense_1, rate=(1.0-keep_prob), seed=seed, training=training)
dense_2 = tf.layers.dense(
dropout_1, units=num_labels, activation=None,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(1.0)
)
return dense_2
with tf.variable_scope("model"):
# Training computation.
logits = model(tf_train_dataset, training=True)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
# Optimizer.
global_step = tf.Variable(0, name="globalStep", trainable=False) # count the number of steps taken.
learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
with tf.variable_scope("model", reuse=True):
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
with tf.variable_scope("model", reuse=True):
test_prediction = tf.nn.softmax(model(tf_test_dataset))
培训和评估:
num_steps = 1001
with tf.Session(graph=graph) as session:
tf.global_variables_initializer().run()
print('Initialized')
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
_, l, predictions = session.run(
[optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 50 == 0):
print('Step {:4d} rate {:.4f} '.format(step, learning_rate.eval()), end='')
accPred = accuracy(predictions, batch_labels)
accValid = accuracy(valid_prediction.eval(), valid_labels)
print('Loss {:2.3f} Batch acc.: {:02.2f}% Validation acc.: {:2.2f}%'.format(l,accPred,accValid))
print('\nTest accuracy: {:.2f}%'.format(accuracy(test_prediction.eval(), test_labels)))
损失冻结在2.303,验证准确度为10%:
Initialized
Step 0 rate 0.0500 Loss 4.903 Batch acc.: 18.75% Validation acc.: 9.98%
Step 50 rate 0.0494 Loss 2.301 Batch acc.: 15.62% Validation acc.: 10.00%
Step 100 rate 0.0489 Loss 2.302 Batch acc.: 12.50% Validation acc.: 10.00%
Step 150 rate 0.0483 Loss 2.303 Batch acc.: 6.25% Validation acc.: 10.00%
Step 200 rate 0.0478 Loss 2.306 Batch acc.: 0.00% Validation acc.: 10.00%
Step 250 rate 0.0473 Loss 2.306 Batch acc.: 3.12% Validation acc.: 10.00%
请提出可能出错的建议?
github上的完整notebook问题版本。问题单元位于底部。
答案 0 :(得分:2)
我看到两件奇怪的事情:
dense2
,其中relu
已激活)。那么,我的想法就是忘记了网络的最后一块:
dense_3 = tf.layers.dense(
dense_2,
units=num_labels,
activation=None, #<<<<<<<<<<<<<<<<<<<<<
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(1.0)
)
return dense_3
或者,从dense_2
删除激活功能。
在笔记本中,我看到您定义了卷积+池层,如下所示:
conv_1 = tf.layers.conv2d(
data, filters=depth, kernel_size=1, padding='SAME',
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
bias_initializer=tf.constant_initializer(0.0),
name="Conv_1"
)
pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')
如果我将其与您之前定义图层的方式进行比较:
def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
weights = tf.get_variable("weights", shape = kernel_shape,
initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
biases = tf.get_variable("biases", shape = bias_shape,
initializer = tf.constant_initializer(bias_init_v))
conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
hidden = tf.nn.relu(conv + biases)
maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')
return maxpool
conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)
我发现你有两个错误:
1 x 1
内核,而不是patch_size x patch_size
中的kernel_size=1
conv2d
5 x 5
个窗口而不是2 x 2
(pool_size=patch_size
调用max_pooling2d
)进行maxpool。请注意,架构之间可能存在更多差异,这些只是我刚才看到的。