在下面的程序中,如何设置第一层的学习率为0.00001,最后一层的学习率为0.001?
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def maxpool2d(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def convolutional_neural_network(x):
weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 20])),
'W_conv2': tf.Variable(tf.random_normal([5, 5, 20, 40])),
'W_fc': tf.Variable(tf.random_normal([7 * 7 * 40, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1': tf.Variable(tf.random_normal([20])),
'b_conv2': tf.Variable(tf.random_normal([40])),
'b_fc': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])) + biases['b_conv1']
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']))+ biases['b_conv2']
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7 * 7 * 40])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out']) + biases['out']
return output
答案 0 :(得分:0)
有几种方法可以解决这个问题。最简单的可能是为每个不同的学习率创建一个单独的optimizer(尽管您需要重组事物以将变量传递给优化器,并返回训练操作):
def convolutional_neural_network(x):
weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 20])),
'W_conv2': tf.Variable(tf.random_normal([5, 5, 20, 40])),
'W_fc': tf.Variable(tf.random_normal([7 * 7 * 40, 1024])),
'out': tf.Variable(tf.random_normal([1024, n_classes]))}
biases = {'b_conv1': tf.Variable(tf.random_normal([20])),
'b_conv2': tf.Variable(tf.random_normal([40])),
'b_fc': tf.Variable(tf.random_normal([1024])),
'out': tf.Variable(tf.random_normal([n_classes]))}
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])) + biases['b_conv1']
conv1 = maxpool2d(conv1)
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']))+ biases['b_conv2']
conv2 = maxpool2d(conv2)
fc = tf.reshape(conv2, [-1, 7 * 7 * 40])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
fc = tf.nn.dropout(fc, keep_rate)
output = tf.matmul(fc, weights['out']) + biases['out']
cost = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
# Assuming that the convolutional layers are considered the "first layer".
opt1 = tf.train.AdamOptimizer(0.00001)
train1_op = opt1.minimize(cost, var_list=[
weights['W_conv1'], weights['W_conv2'], biases['b_conv1'], biases['b_conv2']])
# Assuming that the fully connected and softmax layers are considered the
# "last layer".
opt2 = tf.train.AdamOptimizer(0.001)
train2_op = opt2.minimize(cost, var_list=[
weights['W_fc'], weights['out'], biases['b_fc'], biases['out']])
train_op = tf.group(train1_op, train2_op)
return output, train_op
prediction, optimizer = convolutional_neural_network(x)
另一种方法是使用tf.train.Optimizer.compute_gradients()
计算所有变量的渐变,然后将每个渐变乘以适当的因子,然后再将其传递给tf.train.Optimizer.apply_gradients()
。