Question

下面是使用Kaggle dataset进行手势识别的图像分类的简单模型＃--编码：utf-8-- “”“ kaggle_dataset_code.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1kfj2kPVrioXlWX_CDDOGEfxlwMUj5vs6
"""

!pip install kaggle

#You can download the kaggl.json file from your kaggle account. We are going to upload the kaggle.json file.
from google.colab import files
files.upload()

#making kaggle directory as kaggle website has guided.
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

#Giving specical permissions to the kaggle.json file.
!chmod 600  ~/.kaggle/kaggle.json

downloading the kaggle dataset from the website by copying the API token 
!kaggle datasets download -d gti-upm/leapgestrecog

#Unzip the dataset
zip_data_path = "/content/leapgestrecog.zip"
from zipfile import ZipFile
file_name = "leapgestrecog.zip"

with ZipFile(file_name,'r') as zip:
  zip.extractall()
  print("done")

import cv2

image_data = []
CATEGORIES = ["01_palm", '02_l','03_fist','04_fist_moved','05_thumb','06_index','07_ok','08_palm_moved','09_c','10_down']
IMG_SIZE = 50
import os
unzipped_data_path = "/content/leapgestrecog/leapGestRecog/"
print(os.listdir(unzipped_data_path))

for dr in os.listdir(unzipped_data_path):
    for category in CATEGORIES:
      class_index = CATEGORIES.index(category)
      path = os.path.join(unzipped_data_path, dr, category)
      for image in os.listdir(path):
        image_array = cv2.imread(os.path.join(path, image), cv2.IMREAD_GRAYSCALE)
        image_data.append([cv2.resize(image_array, (IMG_SIZE, IMG_SIZE)), class_index])

#image data of a 19000th image
image_data[19000]

import random
random.shuffle(image_data)
input_data = []
label = []
for X, y in image_data:
  input_data.append(X)
  label.append(y)

import matplotlib.pyplot as plt # for plotting
plt.figure(1, figsize=(10,10))
for i in range(1,10):
    plt.subplot(3,3,i)
    plt.imshow(image_data[i][0], cmap='hot')
    plt.xticks([])
    plt.yticks([])
    plt.title(CATEGORIES[label[i]][3:])
plt.show()

import numpy as np
input_data = np.array(input_data)
label = np.array(label)
input_data = input_data/255.0

import keras

label = keras.utils.to_categorical(label, num_classes=10,dtype='i1')
label[0]

input_data.shape = (-1, IMG_SIZE, IMG_SIZE, 1)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(input_data, label, test_size = 0.3, random_state=0)

from keras.layers import Conv2D, Activation, MaxPool2D, Dense, Flatten, Dropout
model = keras.models.Sequential()

model.add(Conv2D(filters = 32, kernel_size = (3,3), input_shape = (IMG_SIZE, IMG_SIZE, 1)))
model.add(Activation('relu'))


model.add(Conv2D(filters = 32, kernel_size = (3,3)))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters = 64, kernel_size = (3,3)))
model.add(Activation('relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
             optimizer = 'rmsprop',
             metrics = ['accuracy'])
model.summary()

model.fit(X_train, y_train, epochs = 7, batch_size=32, validation_data=(X_test, y_test))
score = model.evaluate(X_test, y_test, batch_size=128)
print(score)

model.save("kaggle_dataset_model.h5")

但是无论我尝试哪种型号，我都会收到类似的以下错误

ValueError：连续图层_2的输入0与该图层不兼容：预期输入形状的轴-1的值为1，但接收到形状为[None，50，50，3]的输入

我希望模型进行预测的代码如下

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1PWDO7aYA6Lhl9FgdgMHh8fj-vlLF_mTw
"""

from keras.models import load_model
from keras.preprocessing import image
import numpy as np

# dimensions of our images
img_width = 50
img_height = 50
# load the model we saved
model = load_model('KaggleModelLeapGesture.h5')
model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

from google.colab import files
from keras.preprocessing import image

uploaded = files.upload()

for fn in uploaded.keys():
 
  # predicting images
  path = fn
  img = image.load_img(path, target_size=(50, 50))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)

  images = np.vstack([x])
  classes = model.predict(images, batch_size=10)
  print(fn)
  print(classes)

Answer 1

按照Dr. Snoopy的建议，该模型是在灰度图像上训练的，但是您尝试在RGB图像上进行预测。请使用图像的灰度版本。

关于下一个关于预测的问题，模型的最后一层是model.add(Dense(10, activation='softmax'))-这意味着您有10个要预测的类，并且使用了softmax函数后，它给出了图像属于这10个不同类别的概率。所有概率之和等于1。

深度学习模型不提供预测，因为输入层不兼容

1 个答案: