Question

我使用tensorflow和kaggle的时尚mnist数据集成功训练了一个cnn。然后，我通过将列车集拆分为新的列车集和验证集来创建验证集。我尝试使用新的训练集重新训练模型，但它给出了这个错误

收到的标签值为-2147483648，超出[0,10]的有效范围。

可能会导致此错误，我该如何解决？我已使用标签[标签＆lt;标签＆ 0]但无处可寻。注意：我需要新数据集在每个类中具有相等比例的实例，所以我使用了StratifiedShuffleSplit。

以下是一些示例代码：

import pandas as pd

fashion_mnist_df = 
 pd.read_csv('C:\\Users\\ladyk\\Documents\\projects\\data_analysis_lesson1\\datasets\\fashion-mnist_train.csv')


# split dataset into train and validation set 
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits = 1, test_size = 0.2, random_state=42)
for train_index, val_index in split.split(fashion_mnist_df, fashion_mnist_df['label']):
    fmnist_train_df = fashion_mnist_df.loc[train_index]
    fmnist_validation_df = fashion_mnist_df.loc[val_index]

fmnist_train_images = fmnist_train_df.drop('label', axis=1)
fmnist_train_labels = fmnist_train_df.label
fmnist_validation_images = fmnist_validation_df.drop('label', axis=1)
fmnist_validation_labels = fmnist_validation_df.label

# build cnn model

c4_fmaps = 32
c4_ksz = 5
c4_strides = 1
c4_padding = "SAME"

s5_psz = 5
s5_strides = 2
s5_padding = "SAME"

n_f6 = 32
n_outputs = 10

c4_drop_rate = 1.0 - 0.75
f6_drop_rate = 1.0 - 0.5

he_init = tf.variance_scaling_initializer()

X = tf.get_default_graph().get_tensor_by_name('Placeholder:0')
y = tf.get_default_graph().get_tensor_by_name('Placeholder_1:0')
X_reshaped = tf.get_default_graph().get_tensor_by_name('Reshape:0')
is_training = tf.get_default_graph().get_tensor_by_name('PlaceholderWithDefault:0')
s3 = tf.get_default_graph().get_tensor_by_name('cnn/max_pooling2d/MaxPool:0')


with tf.name_scope('fmnist_cnn'):
    c4 = tf.layers.conv2d(s3, filters=c4_fmaps, kernel_size=c4_ksz, strides=c4_strides, activation=tf.nn.elu, padding=c4_padding)
    s5 = tf.layers.max_pooling2d(c4, pool_size=s5_psz, strides=s5_strides, padding=s5_padding)
    s5_flat = tf.reshape(s5, shape=(-1, 7 * 7 * c4_fmaps))
    f6 = tf.layers.dense(s5_flat, units=n_f6, kernel_initializer=he_init, activation=tf.nn.elu)
    logits = tf.layers.dense(f6, units = n_outputs, kernel_initializer=he_init, activation=tf.nn.elu)
    y_proba = tf.nn.softmax(logits)

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate=0.01, name="fmnist_optimizer")
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(predictions=logits, targets=y, k=1)
    accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

with tf.name_scope('init_and_save'):
    init = tf.global_variables_initializer()
    new_saver = tf.train.Saver()

#train model

batch_size = 50
n_epochs = 41 
n_batches = len(fmnist_train_images) // batch_size


best_loss = np.infty
checks_without_progress = 0
max_checks = 20

with tf.Session() as sess:
    sess.run(init)
    start_epoch = 0

    for epoch in range(start_epoch, n_epochs):
        rnd_idx = np.random.permutation(len(fmnist_train_images))
        rnd_idx_batches = np.array_split(rnd_idx, n_batches)
        for rnd_idx_batch in rnd_idx_batches:
            X_batch, y_batch = fmnist_train_images.values[rnd_idx_batch], fmnist_train_labels[rnd_idx_batch]
            sess.run([training_op], feed_dict={X: X_batch, y: y_batch})
            acc_val, loss_val = sess.run([accuracy,loss], feed_dict={X: X_batch, y: y_batch, is_training: False})
        if epoch % 1 == 0:
            print("epoch: ", epoch, "\taccuracy: ", acc_val, "\tvalidation loss: ", loss_val, "\tbest loss: ",best_loss)

        # save best loss
        if loss_val < best_loss:
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks:
                print("Early Stopping")
                break
# Error log 

        InvalidArgumentError: Received a label value of -2147483648 which is outside the valid range of [0, 10).  Label values: -2147483648 -2147483648 3 9 6 3 -2147483648 -2147483648 1 7 5 8 4 7 3 -2147483648 3 7 7 -2147483648 1 6 7 -2147483648 2 5 0 9 3 -2147483648 1 0 -2147483648 1 7 0 3 3 7 9 0 4 9 5 2 -2147483648 9 -2147483648 1 8
             [[Node: loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits_1 = SparseSoftmaxCrossEntropyWithLogits[T=DT_FLOAT, Tlabels=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](fmnist_cnn/dense_2/Elu, _arg_Placeholder_1_0_1)]]

        During handling of the above exception, another exception occurred:

        InvalidArgumentError                      Traceback (most recent call last)
        <ipython-input-9-a38cbdec8d58> in <module>()
             20 #             X_batch, y_batch = X_train_tf[rnd_idx_batch], fmnist_train_labels[rnd_idx_batch]
             21             X_batch, y_batch = fmnist_train_images.values[rnd_idx_batch], fmnist_train_labels[rnd_idx_batch]
        ---> 22             sess.run([training_op], feed_dict={X: X_batch, y: y_batch})
             23 
             24         acc_val, loss_val = sess.run([accuracy,loss],                                       feed_dict={X: X_batch, y: y_batch, is_training: False})

Answer 1

尝试将tf.reset_default_graph（）放在注释# reset_graph()所在的位置。如果这没有帮助，那么可以使用tf.variable_scope设置变量，以确保它们可以在您想要的位置重置或重复使用。 https://www.tensorflow.org/api_docs/python/tf/variable_scope

使用tensoflow建立CNN

1 个答案: