MNIST Fashion的矩阵大小不兼容

时间:2018-09-26 18:53:22

标签: python tensorflow keras

我打算将我的Keras代码从示例脚本迁移到服务模式。 我不断得到:

InvalidArgumentError (see above for traceback): Matrix size-incompatible: In[0]: [128,784], In[1]: [16,16]
     [[Node: dense/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](flatten/Reshape, dense/MatMul/ReadVariableOp)]]

完成log

我的测试图像:

print(np.array(test_images).shape)
print(np.array(test_labels).shape)

(10000, 28, 28)
(10000,)

矩阵乘法应为mxn X nxp才有效,在这种情况下,图像输入为28x28,并且在模型中使用Flatten层进行展平,然后转换为784,不确定此处可能是什么问题以及如何解决解决吗?。

原始代码(有效):

import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np

print(tf.__version__)

fashion_mnist = keras.datasets.fashion_mnist
(train_images, train_labels), (
test_images, test_labels) = fashion_mnist.load_data()

train_images = train_images / 255.0
test_images = test_images / 255.0

model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer=tf.train.AdamOptimizer(),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels, epochs=5)
test_loss, test_acc = model.evaluate(test_images, test_labels)

print('Test accuracy:', test_acc)    
predictions = model.predict(test_images)
print(np.argmax(predictions[0]))
print(test_labels[0])

新代码(无效):

def keras_estimator(model_dir, config, learning_rate):  
    model = models.Sequential()
    model.add(Flatten(input_shape=(28, 28)))
    model.add(Dense(128, activation=tf.nn.relu))
    model.add(Dense(10, activation=tf.nn.softmax))

    # Compile model with learning parameters.
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    estimator = tf.keras.estimator.model_to_estimator(
        keras_model=model, model_dir=model_dir, config=config)
    return estimator


def input_fn(x, y, batch_size, mode):  
    # Default settings for training
    num_epochs = None
    shuffle = True

    # Override if this is eval
    if mode == tf.estimator.ModeKeys.EVAL:
        num_epochs = 1
        shuffle = False

    y = np.asarray(y).astype('float32').reshape((-1, 1))
    return tf.estimator.inputs.numpy_input_fn(
        x,
        y=y,
        batch_size=batch_size,
        num_epochs=num_epochs,
        shuffle=shuffle,
        queue_capacity=50000)


def serving_input_fn():   
    feature_placeholder = tf.placeholder(tf.float32, [None, 784])
    features = feature_placeholder
    return tf.estimator.export.TensorServingInputReceiver(features,
                                                          feature_placeholder)

def train_and_evaluate(output_dir, hparams):    
    # Load data.
    (train_images, train_labels), (test_images, test_labels) = \
        utils.prepare_data(train_file=hparams['train_file'],
                           train_labels_file=hparams['train_labels_file'],
                           test_file=hparams['test_file'],
                           test_labels_file=hparams['test_labels_file'])

    #Scale values to a range of 0 to 1.
    train_images = train_images / 255.0
    test_images = test_images / 255.0

    # Create estimator.
    run_config = tf.estimator.RunConfig(save_checkpoints_steps=500)
    estimator = keras_estimator(model_dir=output_dir,
                                config=run_config,
                                learning_rate=hparams['learning_rate'])
    train_steps = hparams['num_epochs'] * len(train_images) / hparams[
        'batch_size']
    # Create TrainSpec.
    train_spec = tf.estimator.TrainSpec(
        input_fn=input_fn(
            train_images,
            train_labels,
            hparams['batch_size'],
            mode=tf.estimator.ModeKeys.TRAIN),
        max_steps=train_steps)

    # Create EvalSpec.
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
    eval_spec = tf.estimator.EvalSpec(
        input_fn=input_fn(
            test_images,
            test_labels,
            hparams['batch_size'],
            mode=tf.estimator.ModeKeys.EVAL),
        steps=None,
        exporters=exporter,
        start_delay_secs=10,
        throttle_secs=10)

    # Start training
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

我之前尝试过拼合,然后更改第一个输入层,结果相同:

    # flatten 28*28 images to a 784 vector for each image
    num_pixels = train_images.shape[1] * train_images.shape[2]

    train_images = train_images.reshape(train_images.shape[0], num_pixels).astype('float32')
    test_images = test_images.reshape(test_images.shape[0], num_pixels).astype('float32')

型号更改:

    model = models.Sequential()
    model.add(Dense(784, input_dim=784, activation=tf.nn.relu))
    model.add(Dense(10, activation=tf.nn.softmax))  

0 个答案:

没有答案