Question

我正在尝试创建一个实时标志识别系统，这是我用于检测手部的代码，当我尝试将检测到的手部传递给经过训练的 CNN 时，它得到了该错误。有什么想法可以为经过训练的模型提供实时帧，并且当没有检测到手时，代码必须保持工作但不预测任何输出。

import mediapipe as mp
import numpy as np
  
import cv2 as cv
import time
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

word_dict = {0:'One',1:'Two',2:'Three',3:'Four',4:'Five'}
model = keras.models.load_model("trained_model.h5")

mphands = mp.solutions.hands
hands = mphands.Hands()
mp_drawing = mp.solutions.drawing_utils
cap = cv2.VideoCapture(0)

_, frame = cap.read()

h, w, c = frame.shape

while True:
    _, frame = cap.read()
    framergb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = hands.process(framergb)
    hand_landmarks = result.multi_hand_landmarks
    if hand_landmarks:
        for handLMs in hand_landmarks:
            x_max = 0
            y_max = 0
            x_min = w
            y_min = h
            for lm in handLMs.landmark:
                x, y = int(lm.x * w), int(lm.y * h)
                if x > x_max:
                    x_max = x + 15
                if x < x_min:
                    x_min = x -15
                if y > y_max:
                    y_max = y +15
                if y < y_min:
                    y_min = y -15
            cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
            mp_drawing.draw_landmarks(frame, handLMs, mphands.HAND_CONNECTIONS)

            Newframe = frame[y_min:y_max ,x_min:x_max]
            cv2.imshow("Newframe", Newframe)
    
            dim = (64, 64)
            img_cnn = cv2.resize(Newframe, dim, interpolation = cv2.INTER_AREA)
            img_cnn = cv2.cvtColor(img_cnn, cv2.COLOR_RGB2GRAY)
            img_array = np.array(img_cnn)
            img_batch = np.expand_dims(img_array, axis=1)
            
            pred = model.predict(img_batch)

            print("Prediction is: ", pred)

    cv2.imshow("Frame", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


ValueError: Input 0 of layer sequential is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: (32, 1, 64)

Answer 1

由于错误消息显示预期输入维度为 4D，Tensorflow keras Conv2D 层需要输入形状为 (batch_shape, img_height, img_width, channels) 的形状 4D。如果输入图像为 RGB 格式，则通道值为 3。灰度图像时为1。

工作示例代码

import tensorflow as tf
input_shape = (4, 28, 28, 3)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv2D(
2, 3, activation='relu', input_shape=input_shape[1:])(x)
print(y.shape)
(4,26,26,2)

ValueError: 层顺序的输入 0 与层不兼容：：预期 min_ndim=4，发现 ndim=3。收到完整形状：(32, 1, 64)

1 个答案: