Question

我是计算机视觉的新手。我正在尝试使用基于深度学习 dnn 模块的人脸检测部分实现具有本地二进制模式的实时人脸识别。我正在使用 caltech_faces 数据集，并在其中添加了一个包含我的 20 张照片的文件夹。

所以，这是我的代码。我基本上通过进行一些更改和添加将示例图像的人脸识别代码转换为实时人脸识别。

执行以下代码时出现以下错误：

predName = le.inverse_transform([predictions[i]])[0]
                                                       ^
TabError: inconsistent use of tabs and spaces in indentation

我检查了所有选项卡和缩进，但找不到要修复的内容和位置。我恳请您给我一个提示，告诉我该怎么做。非常感谢！

# import the necessary packages

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils.video import VideoStream
from imutils import paths
import matplotlib.pyplot as plt
import numpy as np
import argparse
import imutils
import time
import cv2
import os


#Creating our face detector

def detect_faces(net, frame, minConfidence=0.5):
    # grab the dimensions of the image and then construct a blob
    # from it
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
        (104.0, 177.0, 123.0))

    # pass the blob through the network to obtain the face detections,
    # then initialize a list to store the predicted bounding boxes
    net.setInput(blob)
    detections = net.forward()
    boxes = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]

        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > minConfidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # update our bounding box results list
            boxes.append((startX, startY, endX, endY))

    # return the face detection bounding boxes
    return boxes


#Loading the CALTECH Faces dataset

def load_face_dataset(inputPath, net, minConfidence=0.5,
    minSamples=15):
    # grab the paths to all images in our input directory, extract
    # the name of the person (i.e., class label) from the directory
    # structure, and count the number of example images we have per
    # face
    imagePaths = list(paths.list_images(inputPath))
    names = [p.split(os.path.sep)[-2] for p in imagePaths]
    (names, counts) = np.unique(names, return_counts=True)
    names = names.tolist()

    # initialize lists to store our extracted faces and associated
    # labels
    faces = []
    labels = []

    # loop over the image paths
    for imagePath in imagePaths:
        # load the image from disk and extract the name of the person
        # from the subdirectory structure
        frame = cv2.imread(imagePath)
        name = imagePath.split(os.path.sep)[-2]

        # only process images that have a sufficient number of
        # examples belonging to the class
        if counts[names.index(name)] < minSamples:
            continue

        # perform face detection
        boxes = detect_faces(net, frame, minConfidence)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # extract the face ROI, resize it, and convert it to
            # grayscale
            faceROI = frame[startY:endY, startX:endX]
            faceROI = cv2.resize(faceROI, (47, 62))
            faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)

            # update our faces and labels lists
            faces.append(faceROI)
            labels.append(name)

    # convert our faces and labels lists to NumPy arrays
    faces = np.array(faces)
    labels = np.array(labels)

    # return a 2-tuple of the faces and labels
    return (faces, labels)

#Implementing Local Binary Patterns for face recognition    

# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--input", type=str, required=True,
#   help="path to input directory of images")
# ap.add_argument("-f", "--face", type=str,
#   default="face_detector",
#   help="path to face detector model directory")
# ap.add_argument("-c", "--confidence", type=float, default=0.5,
#   help="minimum probability to filter weak detections")
# args = vars(ap.parse_args())

# since we are using Jupyter Notebooks we can replace our argument
# parsing code with *hard coded* arguments and values
args = {
    "input": "caltech_faces",
    "face": "face_detector",
    "confidence": 0.5,
}

# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
    "res10_300x300_ssd_iter_140000.caffemodel"])
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the CALTECH faces dataset
print("[INFO] loading dataset...")
(faces, labels) = load_face_dataset(args["input"], net,
    minConfidence=0.5, minSamples=20)
print("[INFO] {} images in dataset".format(len(faces)))

# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# construct our training and testing split
(trainX, testX, trainY, testY) = train_test_split(faces,
    labels, test_size=0.25, stratify=labels, random_state=42)

# train our LBP face recognizer
print("[INFO] training face recognizer...")
recognizer = cv2.face.LBPHFaceRecognizer_create(
    radius=2, neighbors=16, grid_x=8, grid_y=8)
start = time.time()
recognizer.train(trainX, trainY)
end = time.time()
print("[INFO] training took {:.4f} seconds".format(end - start))


# initialize the list of predictions and confidence scores
print("[INFO] gathering predictions...")
predictions = []
confidence = []
start = time.time()

# loop over the test data
for i in range(0, len(testX)):
    # classify the face and update the list of predictions and
    # confidence scores
    (prediction, conf) = recognizer.predict(testX[i])
    predictions.append(prediction)
    confidence.append(conf)

# measure how long making predictions took
end = time.time()
print("[INFO] inference took {:.4f} seconds".format(end - start))

# show the classification report
print(classification_report(testY, predictions,
    target_names=le.classes_))


# initialize the video stream and allow the cammera sensor to warmup
print("[INFO] starting video stream...")
vs = VideoStream(src=0).start()
time.sleep(2.0)

# loop over the frames from the video stream
while True:

    # grab the frame from the threaded video stream and resize it
    # to have a maximum width of 400 pixels
    face = vs.read()
    face = imutils.resize(face, width=400)

    # loop over the detections
    for i in range(0, detections.shape[2]):

        # grab the predicted name and actual name
    predName = le.inverse_transform([predictions[i]])[0]
    actualName = le.classes_[testY[i]]


    # draw the predicted name and actual name on the image
    cv2.putText(face, "pred: {}".format(predName), (5, 25),
    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.putText(face, "actual: {}".format(actualName), (5, 60),
    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # display the predicted name, actual name, and confidence of the
    # prediction (i.e., chi-squared distance; the *lower* the distance
    # is the *more confident* the prediction is)
    print("[INFO] prediction: {}, actual: {}, confidence: {:.2f}".format(predName, actualName, confidence[i]))

# show the output frame
cv2.imshow("Face", face)
key = cv2.waitKey(1) & 0xFF
 
# if the `q` key was pressed, break from the loop
if key == ord("q"):
    break

Answer 1

我为此使用了 google collab，首先，请确保您已安装 OpenCV。您可以使用 pip 安装它：

['/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Oceania',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Oceania',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Oceania',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Asia',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Asia',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Asia',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Europe',
 '/wiki/Americas',
 '/wiki/Oceania',
 '/wiki/Africa',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Africa',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Americas',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Oceania',
 '/wiki/Africa',
 '/wiki/Oceania',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Oceania',
 '/wiki/Oceania',
 '/wiki/Oceania',
 '/wiki/Oceania',
 '/wiki/Americas',
 '/wiki/Oceania']

在检测人脸之前，我们应该使用 google collab 打开网络摄像头。

pip install opencv-python

您必须运行以下代码作为第二步。

from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
def take_photo(filename='photo.jpg', quality=0.8):
js = Javascript('''
async function takePhoto(quality) {
const div = document.createElement('div');
const capture = document.createElement('button');
capture.textContent = 'Capture';
div.appendChild(capture);
const video = document.createElement('video');
video.style.display = 'block';
const stream = await navigator.mediaDevices.getUserMedia({video: true});
document.body.appendChild(div);
div.appendChild(video);
video.srcObject = stream;
await video.play();
// Resize the output to fit the video element.     google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);
// Wait for Capture to be clicked.
await new Promise((resolve) => capture.onclick = resolve);
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
canvas.getContext('2d').drawImage(video, 0, 0);
stream.getVideoTracks()[0].stop();
div.remove();
return canvas.toDataURL('image/jpeg', quality);
}
''')
display(js)
data = eval_js('takePhoto({})'.format(quality))
binary = b64decode(data.split(',')[1])
with open(filename, 'wb') as f:
f.write(binary)
return filename

运行这两个代码后，网络摄像头就打开了，可以拍照了。照片保存为 photo.jpg。

使用 Haar 级联的人脸检测是一种基于机器学习的方法，其中使用一组输入数据训练级联函数。 OpenCV 已经包含许多针对面部、眼睛、微笑等的预训练分类器。今天我们将使用面部分类器。您也可以尝试使用其他分类器。

使用 LBP、深度学习和 OpenCV 进行实时人脸识别

1 个答案: