Question

I am trying to implement a simple gender classifier using deep convolutional neural networks using tensorflow. I have found this model and implemented it.

def create_model_v2(data):

    cl1_desc = {'weights':weight_variable([7,7,3,96]), 'biases':bias_variable([96])}
    cl2_desc = {'weights':weight_variable([5,5,96,256]), 'biases':bias_variable([256])}
    cl3_desc = {'weights':weight_variable([3,3,256,384]), 'biases':bias_variable([384])}

    fc1_desc = {'weights':weight_variable([240000, 128]), 'biases':bias_variable([128])}
    fc2_desc = {'weights':weight_variable([128,128]), 'biases':bias_variable([128])}
    fc3_desc = {'weights':weight_variable([128,2]), 'biases':bias_variable([2])}

    cl1 = conv2d(data,cl1_desc['weights'] + cl1_desc['biases'])
    cl1 = tf.nn.relu(cl1)
    pl1 = max_pool_nxn(cl1,3,[1,2,2,1])
    lrm1 = tf.nn.local_response_normalization(pl1)

    cl2 = conv2d(lrm1, cl2_desc['weights'] + cl2_desc['biases'])
    cl2 = tf.nn.relu(cl2)
    pl2 = max_pool_nxn(cl2,3,[1,2,2,1])
    lrm2 = tf.nn.local_response_normalization(pl2)

    cl3 = conv2d(lrm2, cl3_desc['weights'] + cl3_desc['biases'])
    cl3 = tf.nn.relu(cl3)
    pl3 = max_pool_nxn(cl3,3,[1,2,2,1])

    fl = tf.contrib.layers.flatten(cl3)

    fc1 = tf.add(tf.matmul(fl, fc1_desc['weights']), fc1_desc['biases'])
    drp1 = tf.nn.dropout(fc1,0.5)
    fc2 = tf.add(tf.matmul(drp1, fc2_desc['weights']), fc2_desc['biases'])
    drp2 = tf.nn.dropout(fc2,0.5)
    fc3 = tf.add(tf.matmul(drp2, fc3_desc['weights']), fc3_desc['biases'])

    return fc3

What I need to note at this point is that I have also done all the pre-processing steps described in the paper, however my images are resized to 100x100x3 instead of the 277x277x3.

I have defined the the logits to be [0,1] for females and [1,0] for males

x = tf.placeholder('float',[None,100,100,3])
y = tf.placeholder('float',[None,2])

And have defined the training procedure as follows:

def train(x, hm_epochs, LR):
    #prediction = create_model_v2(x)
    prediction = create_model_v2(x)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits  = prediction, labels = y) )
    optimizer = tf.train.AdamOptimizer(learning_rate=LR).minimize(cost)
    batch_size = 50
    correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
    print("hello")
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(hm_epochs):
            epoch_loss = 0
            i = 0
            while i < (len(x_train)):
                start = i
                end = i + batch_size
                batch_x = x_train[start:end]
                batch_y = y_train[start:end]
                whatever, vigen = sess.run([optimizer, cost], feed_dict = {x:batch_x, y:batch_y})
                epoch_loss += vigen
                i+=batch_size

            print('Epoch',  epoch ,'loss:',epoch_loss/len(x_train))
            if (epoch+1) % 2 == 0:
                j = 0
                acc = []
                while j < len(x_test):
                    acc += [accuracy.eval(feed_dict = {x:x_test[j:j + 10], y:y_test[j:j+10]})]
                    j+= 10
                print ('accuracy after', epoch + 1, 'epochs on test set: ', sum(acc)/len(acc))

                j = 0
                acc = []
                while j < len(x_train):
                    acc += [accuracy.eval(feed_dict = {x:x_train[j:j + 10], y:y_train[j:j+10]})]
                    j+= 10
                print ('accuracy after', epoch, ' epochs on train set:', sum(acc)/len(acc))

Half of the code above is just for outputting test and train accuracies every 2 epochs.

Anyhow the loss starts high at first epoch

('Epoch', 0, 'loss:', 148.87030902462453)

('Epoch', 1, 'loss:', 0.01549744715988636)

('accuracy after', 2, 'epochs on test set: ', 0.33052011888510396)

('accuracy after', 1, ' epochs on train set:', 0.49607501227222384)

('Epoch', 2, 'loss:', 0.015493246909976005)

What am I missing?

and continues like this keeping the accuracy at 0.5 for train set.

EDIT: the functions weights variable, conv2d and max_pool_nn are

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def avg_pool_nxn(x, n, strides):
    return tf.nn.avg_pool(x, ksize=[1,n,n,1], strides = strides,padding = 'SAME')

def max_pool_nxn(x, n, strides):
    return tf.nn.max_pool(x, ksize=[1,n,n,1], strides = strides, padding = 'SAME')

def conv2d(x, W,stride = [1,1,1,1]):
    return tf.nn.conv2d(x, W, strides = stride, padding = 'SAME')

EDIT 2 - Problem solved

The Problem was fascinatingly related to parameter initialization. Changing the weight initialization from Normal Distribution to Xavier initialization worked wonders and accuracy ended up at about 86%. If anyone is interested here is the original paper http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf, if anyone knows and cares to explain exactly why Xavier works well with convnets and images feel free to post an answer.

The loss function decreases, but accuracy on train set does not change in tensorflow

0 个答案: