我正在尝试创建一个实时标志识别系统,这是我用于检测手部的代码,当我尝试将检测到的手部传递给经过训练的 CNN 时,它得到了该错误。有什么想法可以为经过训练的模型提供实时帧,并且当没有检测到手时,代码必须保持工作但不预测任何输出。
import mediapipe as mp
import numpy as np
import cv2 as cv
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
word_dict = {0:'One',1:'Two',2:'Three',3:'Four',4:'Five'}
model = keras.models.load_model("trained_model.h5")
mphands = mp.solutions.hands
hands = mphands.Hands()
mp_drawing = mp.solutions.drawing_utils
cap = cv2.VideoCapture(0)
_, frame = cap.read()
h, w, c = frame.shape
while True:
_, frame = cap.read()
framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
result = hands.process(framergb)
hand_landmarks = result.multi_hand_landmarks
if hand_landmarks:
for handLMs in hand_landmarks:
x_max = 0
y_max = 0
x_min = w
y_min = h
for lm in handLMs.landmark:
x, y = int(lm.x * w), int(lm.y * h)
if x > x_max:
x_max = x + 15
if x < x_min:
x_min = x -15
if y > y_max:
y_max = y +15
if y < y_min:
y_min = y -15
cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
mp_drawing.draw_landmarks(frame, handLMs, mphands.HAND_CONNECTIONS)
Newframe = frame[y_min:y_max ,x_min:x_max]
cv2.imshow("Newframe", Newframe)
dim = (64, 64)
img_cnn = cv2.resize(Newframe, dim, interpolation = cv2.INTER_AREA)
img_cnn = cv2.cvtColor(img_cnn, cv2.COLOR_RGB2GRAY)
img_array = np.array(img_cnn)
img_batch = np.expand_dims(img_array, axis=1)
pred = model.predict(img_batch)
print("Prediction is: ", pred)
cv2.imshow("Frame", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
ValueError: Input 0 of layer sequential is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (32, 1, 64)
答案 0 :(得分:0)
由于错误消息显示预期输入维度为 4D,Tensorflow keras Conv2D 层需要输入形状为 (batch_shape, img_height, img_width, channels)
的形状 4D。
如果输入图像为 RGB 格式,则通道值为 3。灰度图像时为1。
工作示例代码
import tensorflow as tf
input_shape = (4, 28, 28, 3)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv2D(
2, 3, activation='relu', input_shape=input_shape[1:])(x)
print(y.shape)
(4,26,26,2)