我有以下代码用于多标签分类:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
#.........................................................................
learning_rate = 0.001
training_epochs = 5000
display_step = 50
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
#cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y))
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
#optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
#--------------------------------------------------------------------------------
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
输出如下:
Epoch: 000 Training Accuracy: 0.31500 cost= 62297.6406250000
Epoch: 050 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 100 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 150 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 200 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 250 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 300 Training Accuracy: 0.30722 cost= 433502.8125000000
Epoch: 350 Training Accuracy: 0.30722 cost= 433502.8125000000
...
Epoch: 5000 Training Accuracy: 0.30722 cost= 433502.8125000000
如上所述,培训准确性在整个培训过程中几乎保持不变。我将隐藏层数和学习率从0.001,0.01改为0.1,趋势仍然相同。
我很欣赏我可能做错的一些帮助。
答案 0 :(得分:1)
您的代码的主要问题是您没有使用小批量梯度下降,而是使用每个梯度下降更新的整个训练数据。另外我认为5000个时代太多了,我想50-100就足够了(你可以通过实验验证)。同样在以下几行中,第二行是冗余的,事实上,当您想要执行此操作时,您在每次迭代中运行图形两次:
sess.run(optimizer, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
avg_cost = sess.run(cost, feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
正确的表格:
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
以下是修改过的代码(我添加# ADDED #
行的注释:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
X, Y = make_multilabel_classification(n_samples=10000, n_features=200, n_classes=10, n_labels=2,
allow_unlabeled=False, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=2)
batch_size = 100 # ADDED #
num_batches = x_train.shape[0]/batch_size # ADDED #
learning_rate = 0.001
training_epochs = 5000
display_step = 1
num_input = x_train.shape[1]
num_classes = y_train.shape[1]
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden):
X = tf.nn.dropout(X, p_keep_input)
h = tf.nn.relu(tf.matmul(X, w_h))
h = tf.nn.dropout(h, p_keep_hidden)
h2 = tf.nn.relu(tf.matmul(h, w_h2))
h2 = tf.nn.dropout(h2, p_keep_hidden)
h3 = tf.nn.relu(tf.matmul(h2, w_h3))
h3 = tf.nn.dropout(h3, p_keep_hidden)
return tf.nn.sigmoid(tf.matmul(h3, w_o))
x = tf.placeholder("float", [None, num_input])
y = tf.placeholder("float", [None, num_classes])
w_h = init_weights([num_input, 500])
w_h2 = init_weights([500, 500])
w_h3 = init_weights([500, 500])
w_o = init_weights([500, num_classes])
p_keep_input = tf.placeholder("float")
p_keep_hidden = tf.placeholder("float")
pred = model(x, w_h, w_h2, w_o, p_keep_input, p_keep_hidden)
cost = -tf.reduce_sum( ( (y*tf.log(pred + 1e-9)) + ((1-y) * tf.log(1 - pred + 1e-9)) ) , name='xentropy' )
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.local_variables_initializer())
for epoch in range(training_epochs):
for i in xrange(num_batches):# ADDED #
indices = xrange(i*batch_size, (i+1)*batch_size)# ADDED #
_, avg_cost= sess.run([optimizer,cost], feed_dict = {x : x_train[indices], y : y_train[indices], p_keep_input: 1.0, p_keep_hidden: 1.0})# ADDED #
if epoch % display_step == 0:
training_acc = accuracy.eval({x : x_train, y : y_train, p_keep_input: 1.0, p_keep_hidden: 1.0})
print("Epoch:", '%03d' % (epoch), "Training Accuracy:", '%.5f' % (training_acc), "cost=", "{:.10f}".format(avg_cost))
print("Optimization Complete!")
a = tf.cast(tf.argmax(pred, 1),tf.float32)
b = tf.cast(tf.argmax(y,1),tf.float32)
roc_score = tf.metrics.auc(b, a)
cm = tf.confusion_matrix(b, a)
sess.run(tf.local_variables_initializer())
print(sess.run(cm, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))
print(sess.run(roc_score, feed_dict={x : x_test, y : y_test, p_keep_input: 1.0, p_keep_hidden: 1.0}))