如何设置第一层的学习率为0.00001,最后一层的学习率为0.001?

时间:2017-03-17 21:12:14

标签: python-3.x tensorflow

在下面的程序中,如何设置第一层的学习率为0.00001,最后一层的学习率为0.001?

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


def convolutional_neural_network(x):

    weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 20])),
               'W_conv2': tf.Variable(tf.random_normal([5, 5, 20, 40])),
               'W_fc': tf.Variable(tf.random_normal([7 * 7 * 40, 1024])),
               'out': tf.Variable(tf.random_normal([1024, n_classes]))}
    biases = {'b_conv1': tf.Variable(tf.random_normal([20])),
              'b_conv2': tf.Variable(tf.random_normal([40])),
              'b_fc': tf.Variable(tf.random_normal([1024])),
              'out': tf.Variable(tf.random_normal([n_classes]))}
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])) + biases['b_conv1']
    conv1 = maxpool2d(conv1)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']))+ biases['b_conv2']
    conv2 = maxpool2d(conv2)
    fc = tf.reshape(conv2, [-1, 7 * 7 * 40])
    fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
    fc = tf.nn.dropout(fc, keep_rate)
    output = tf.matmul(fc, weights['out']) + biases['out']

    return output

1 个答案:

答案 0 :(得分:0)

有几种方法可以解决这个问题。最简单的可能是为每个不同的学习率创建一个单独的optimizer(尽管您需要重组事物以将变量传递给优化器,并返回训练操作):

def convolutional_neural_network(x):
    weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 1, 20])),
               'W_conv2': tf.Variable(tf.random_normal([5, 5, 20, 40])),
               'W_fc': tf.Variable(tf.random_normal([7 * 7 * 40, 1024])),
               'out': tf.Variable(tf.random_normal([1024, n_classes]))}
    biases = {'b_conv1': tf.Variable(tf.random_normal([20])),
              'b_conv2': tf.Variable(tf.random_normal([40])),
              'b_fc': tf.Variable(tf.random_normal([1024])),
              'out': tf.Variable(tf.random_normal([n_classes]))}
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])) + biases['b_conv1']
    conv1 = maxpool2d(conv1)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']))+ biases['b_conv2']
    conv2 = maxpool2d(conv2)
    fc = tf.reshape(conv2, [-1, 7 * 7 * 40])
    fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])
    fc = tf.nn.dropout(fc, keep_rate)
    output = tf.matmul(fc, weights['out']) + biases['out']

    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logit‌​s=prediction, labels=y))

    # Assuming that the convolutional layers are considered the "first layer".
    opt1 = tf.train.AdamOptimizer(0.00001)
    train1_op = opt1.minimize(cost, var_list=[
        weights['W_conv1'], weights['W_conv2'], biases['b_conv1'], biases['b_conv2']])

    # Assuming that the fully connected and softmax layers are considered the
    # "last layer".
    opt2 = tf.train.AdamOptimizer(0.001)
    train2_op = opt2.minimize(cost, var_list=[
        weights['W_fc'], weights['out'], biases['b_fc'], biases['out']])

    train_op = tf.group(train1_op, train2_op)
    return output, train_op

prediction, optimizer = convolutional_neural_network(x)

另一种方法是使用tf.train.Optimizer.compute_gradients()计算所有变量的渐变,然后将每个渐变乘以适当的因子,然后再将其传递给tf.train.Optimizer.apply_gradients()