Question

我是Tensorflow的新手，我试图使用tensorflow对图像（大小为12x22）进行文本识别。训练模型似乎还可以，但是当进行预测时，会出现错误：

  File "C:\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\training_utils.py", line 316, in standardize_input_data
    'with shape ' + str(data_shape))
ValueError: Error when checking input: expected flatten_input to have 3 dimensions, but got array with shape (22, 12)

我不知道为什么只用2D输入图像需要3维。

train_model.py，这是培训师，导出培训模型

import cv2
import pickle
import os.path
import numpy as np
from imutils import paths
import tensorflow as tf
# from sklearn.preprocessing import LabelBinarizer
# from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
# from keras.layers.convolutional import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Flatten, Dense

import image_fit

LETTER_IMAGES_FOLDER = "training_generate/divided_sample"
MODEL_FILENAME = "captcha_model.hdf5"
MODEL_LABELS_FILENAME = "model_labels.dat"


# initialize the data and labels
data = []
labels = []

# loop over the input images
print('Read images')
for image_file in paths.list_images(LETTER_IMAGES_FOLDER):
    # Load the image and convert it to grayscale
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # resize the image as 12, 22
    image = image_fit.resize_to_fit(image, 12, 22)

    # Grab the name of the letter based on the folder it was in
    label = image_file.split(os.sep)[-2]
    label = chr(int(label))
    # Add the letter image and it's label to our training data
    data.append(image)
    labels.append(label)

print('Done reading')
data = tf.keras.utils.normalize(np.array(data), axis=1)
# print(data[0])
labels = np.array(labels)

model = Sequential()
model.add(Flatten(input_shape=(22, 12)))
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dense(62, activation=tf.nn.softmax))

model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(data, labels, epochs=3)
print('Finish training')

# val_loss, val_acc = model.evaluate()
model.save(MODEL_FILENAME)

solve.py，这是解决字母的python脚本。

import cv2
import pickle
import os.path
import numpy as np
from imutils import paths
import tensorflow as tf

import image_fit


LETTER_IMAGES_FOLDER = "training_generate/divided_sample"
MODEL_FILENAME = "captcha_model.hdf5"
MODEL_LABELS_FILENAME = "model_labels.dat"

image_file = 'test_0.bmp'
image = cv2.imread(image_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = image_fit.resize_to_fit(image, 12, 22)

new_model = tf.keras.models.load_model(MODEL_FILENAME)
predicitions = new_model.predict(image)
print(np.argmax(predicitions[0]))

Answer 1

用直线进行预测时

predicitions = new_model.predict(image)

期望第一维是批处理中的图像数。由于您只有一个图像，因此第一个尺寸为尺寸1。您可以将图像的大小调整为（1,22,12），这将通过：

predicitions = new_model.predict(image.reshape(1,22,12))

Answer 2

根据克鲁格的回答，代码应如下所示：

    image_file = 'test_0.bmp'
    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = image_fit.resize_to_fit(image, 12, 22)

    data = [image]

    new_model = tf.keras.models.load_model(MODEL_FILENAME)
    predicitions = new_model.predict(np.array(data))
    print(np.argmax(predicitions[0]))

ValueError：检查输入时出错：预期flatten_input具有3个维，但数组的形状为（22，12）

2 个答案: