具有Tensorflow的多标签分类

时间:2017-03-15 03:01:51

标签: tensorflow gradient-descent multilabel-classification

我有以下代码用于多标签分类:

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split

X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
                                      allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

#.........................................................................
learning_rate = 0.001
training_epochs = 5000
display_step = 50

num_input = x_train.shape[1]
num_classes = y_train.shape[1]

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden): 
    X = tf.nn.dropout(X, p_keep_input)

    h = tf.nn.relu(tf.matmul(X, w_h))
    h = tf.nn.dropout(h, p_keep_hidden)

    h2 = tf.nn.relu(tf.matmul(h, w_h2))
    h2 = tf.nn.dropout(h2, p_keep_hidden)

    h3 = tf.nn.relu(tf.matmul(h2, w_h3))
    h3 = tf.nn.dropout(h3, p_keep_hidden)

    return tf.nn.sigmoid(tf.matmul(h3, w_o))

x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])

w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])

p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)

#cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))

cost = -tf.reduce_sum( (  (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) )  , name='xentropy' )

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
#optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

#--------------------------------------------------------------------------------
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    sess.run(tf.local_variables_initializer())

    for epoch in range(training_epochs):
        sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
        avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})

        if epoch % display_step == 0:
            training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
            print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))

    print("Optimization Complete!")

    a = tf.cast(tf.argmax(pred, 1),tf.float32)
    b = tf.cast(tf.argmax(y,1),tf.float32)

    roc_score = tf.metrics.auc(b, a)
    cm = tf.confusion_matrix(b, a)

    sess.run(tf.local_variables_initializer())
    print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
    print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))

输出如下:

Epoch: 000 Training Accuracy: 0.31500 cost= 62297.6406250000
Epoch: 050 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 100 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 150 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 200 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 250 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 300 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 350 Training Accuracy: 0.30722 cost= 433502.8125000000
...
Epoch: 5000 Training Accuracy: 0.30722 cost= 433502.8125000000

如上所述,培训准确性在整个培训过程中几乎保持不变。我将隐藏层数和学习率从0.001,0.01改为0.1,趋势仍然相同。

我很欣赏我可能做错的一些帮助。

1 个答案:

答案 0 :(得分:1)

您的代码的主要问题是您没有使用小批量梯度下降,而是使用每个梯度下降更新的整个训练数据。另外我认为5000个时代太多了,我想50-100就足够了(你可以通过实验验证)。同样在以下几行中,第二行是冗余的,事实上,当您想要执行此操作时,您在每次迭代中运行图形两次:

sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})

正确的表格:

_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})

以下是修改过的代码(我添加# ADDED #行的注释:

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split

X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
                                  allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

batch_size = 100 # ADDED #
num_batches = x_train.shape[0]/batch_size # ADDED #

learning_rate = 0.001
training_epochs = 5000
display_step = 1

num_input = x_train.shape[1]
num_classes = y_train.shape[1]

def init_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev=0.01))

def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden): 
    X = tf.nn.dropout(X, p_keep_input)

    h = tf.nn.relu(tf.matmul(X, w_h))
    h = tf.nn.dropout(h, p_keep_hidden)

    h2 = tf.nn.relu(tf.matmul(h, w_h2))
    h2 = tf.nn.dropout(h2, p_keep_hidden)

    h3 = tf.nn.relu(tf.matmul(h2, w_h3))
    h3 = tf.nn.dropout(h3, p_keep_hidden)

    return tf.nn.sigmoid(tf.matmul(h3, w_o))

x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])

w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])

p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
cost = -tf.reduce_sum( (  (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) )  , name='xentropy' )

optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)

correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    sess.run(tf.local_variables_initializer())

    for epoch in range(training_epochs):

        for i in xrange(num_batches):# ADDED #

            indices = xrange(i*batch_size, (i+1)*batch_size)# ADDED #
           _, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train[indices], y : y_train[indices], p_keep_input: 1.0, p_keep_hidden: 1.0})# ADDED #

        if epoch % display_step == 0:
            training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
            print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))

    print("Optimization Complete!")

    a = tf.cast(tf.argmax(pred, 1),tf.float32)
    b = tf.cast(tf.argmax(y,1),tf.float32)

    roc_score = tf.metrics.auc(b, a)
    cm = tf.confusion_matrix(b, a)

    sess.run(tf.local_variables_initializer())
    print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
    print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))