用相机进行人脸检测

时间:2016-12-28 02:46:37

标签: ios swift avfoundation face-detection

如何像“相机”那样实时进行人脸检测?

enter image description here

我注意到10.0之后不推荐使用 AVCaptureStillImageOutput ,所以我使用 相反, AVCapturePhotoOutput 。但是,我发现我为面部检测保存的图像并不那么令人满意?有任何想法吗?

更新

试图@Shravya Boggarapu提到。目前,我使用AVCaptureMetadataOutput来检测没有CIFaceDetector的面部。它按预期工作。然而,当我试图画出脸部的边界时,似乎错位了。任何的想法?

enter image description here

let metaDataOutput = AVCaptureMetadataOutput()

captureSession.sessionPreset = AVCaptureSessionPresetPhoto
    let backCamera = AVCaptureDevice.defaultDevice(withDeviceType: .builtInWideAngleCamera, mediaType: AVMediaTypeVideo, position: .back)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {
            captureSession.addInput(input)

            // MetadataOutput instead
            if(captureSession.canAddOutput(metaDataOutput)) {
                captureSession.addOutput(metaDataOutput)

                metaDataOutput.setMetadataObjectsDelegate(self, queue: DispatchQueue.main)
                metaDataOutput.metadataObjectTypes = [AVMetadataObjectTypeFace]

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.frame = cameraView.bounds
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill

                cameraView.layer.addSublayer(previewLayer!)
                captureSession.startRunning()
            }

        }

    } catch {
        print(error.localizedDescription)
    }

extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
        findFaceControl = false
        for metadataObject in metadataObjects {
            if (metadataObject as AnyObject).type == AVMetadataObjectTypeFace {
                print("")
                print(metadataObject)
                let bounds = (metadataObject as! AVMetadataFaceObject).bounds
                print("origin x: \(bounds.origin.x)")
                print("origin y: \(bounds.origin.y)")
                print("size width: \(bounds.size.width)")
                print("size height: \(bounds.size.height)")
                print("cameraView width: \(self.cameraView.frame.width)")
                print("cameraView height: \(self.cameraView.frame.height)")
                var face = CGRect()
                face.origin.x = bounds.origin.x * self.cameraView.frame.width
                face.origin.y = bounds.origin.y * self.cameraView.frame.height
                face.size.width = bounds.size.width * self.cameraView.frame.width
                face.size.height = bounds.size.height * self.cameraView.frame.height
                print(face)

                showBounds(at: face)
            }
        }
    }

}
}

原始

see in Github

var captureSession = AVCaptureSession()
var photoOutput = AVCapturePhotoOutput()
var previewLayer: AVCaptureVideoPreviewLayer?    

override func viewWillAppear(_ animated: Bool) {
    super.viewWillAppear(true)

    captureSession.sessionPreset = AVCaptureSessionPresetHigh

    let backCamera = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {
            captureSession.addInput(input)

            if(captureSession.canAddOutput(photoOutput)){
                captureSession.addOutput(photoOutput)
                captureSession.startRunning()

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill
                previewLayer?.frame = cameraView.bounds

                cameraView.layer.addSublayer(previewLayer!)
            }
        }

    } catch {
        print(error.localizedDescription)
    }

}

func captureImage() {
    let settings = AVCapturePhotoSettings()
    let previewPixelType = settings.availablePreviewPhotoPixelFormatTypes.first!
    let previewFormat = [kCVPixelBufferPixelFormatTypeKey as String: previewPixelType
                         ]
    settings.previewPhotoFormat = previewFormat
    photoOutput.capturePhoto(with: settings, delegate: self)

}



func capture(_ captureOutput: AVCapturePhotoOutput, didFinishProcessingPhotoSampleBuffer photoSampleBuffer: CMSampleBuffer?, previewPhotoSampleBuffer: CMSampleBuffer?, resolvedSettings: AVCaptureResolvedPhotoSettings, bracketSettings: AVCaptureBracketedStillImageSettings?, error: Error?) {
    if let error = error {
        print(error.localizedDescription)
    }
    // Not include previewPhotoSampleBuffer
    if let sampleBuffer = photoSampleBuffer,
        let dataImage = AVCapturePhotoOutput.jpegPhotoDataRepresentation(forJPEGSampleBuffer: sampleBuffer, previewPhotoSampleBuffer: nil) {
            self.imageView.image = UIImage(data: dataImage)
            self.imageView.isHidden = false
            self.previewLayer?.isHidden = true
            self.findFace(img: self.imageView.image!)
        }
}

findFace适用于普通图像。但是,我通过相机拍摄的图像不起作用,或者有时只识别一张脸。

普通图片

enter image description here

捕获图片

enter image description here

func findFace(img: UIImage) {
    guard let faceImage = CIImage(image: img) else { return }
    let accuracy = [CIDetectorAccuracy: CIDetectorAccuracyHigh]
    let faceDetector = CIDetector(ofType: CIDetectorTypeFace, context: nil, options: accuracy)


    // For converting the Core Image Coordinates to UIView Coordinates
    let detectedImageSize = faceImage.extent.size
    var transform = CGAffineTransform(scaleX: 1, y: -1)
    transform = transform.translatedBy(x: 0, y: -detectedImageSize.height)


    if let faces = faceDetector?.features(in: faceImage, options: [CIDetectorSmile: true, CIDetectorEyeBlink: true]) {
        for face in faces as! [CIFaceFeature] {

            // Apply the transform to convert the coordinates
            var faceViewBounds =  face.bounds.applying(transform)
            // Calculate the actual position and size of the rectangle in the image view
            let viewSize = imageView.bounds.size
            let scale = min(viewSize.width / detectedImageSize.width,
                            viewSize.height / detectedImageSize.height)
            let offsetX = (viewSize.width - detectedImageSize.width * scale) / 2
            let offsetY = (viewSize.height - detectedImageSize.height * scale) / 2

            faceViewBounds = faceViewBounds.applying(CGAffineTransform(scaleX: scale, y: scale))
            print("faceBounds = \(faceViewBounds)")
            faceViewBounds.origin.x += offsetX
            faceViewBounds.origin.y += offsetY

            showBounds(at: faceViewBounds)
        }

        if faces.count != 0 {
            print("Number of faces: \(faces.count)")
        } else {
            print("No faces ")
        }
    }


}

func showBounds(at bounds: CGRect) {
    let indicator = UIView(frame: bounds)
    indicator.frame =  bounds
    indicator.layer.borderWidth = 3
    indicator.layer.borderColor = UIColor.red.cgColor
    indicator.backgroundColor = .clear

    self.imageView.addSubview(indicator)
    faceBoxes.append(indicator)

}

6 个答案:

答案 0 :(得分:11)

检测面部有两种方法:一种是CIFaceDetector,另一种是AVCaptureMetadataOutput

根据您的要求,选择与您相关的内容。

CIFaceDetector有更多功能 - 例如:给你眼睛和嘴巴的位置,微笑探测器等

另一方面,AVCaptureMetadataOutput是在帧上计算的,并且跟踪检测到的面部,并且我们不需要添加额外的代码。我发现,由于跟踪面在此过程中被更可靠地检测到。这个问题是你只需要检测面部,没有眼睛/嘴巴的位置。 此方法的另一个优点是方向问题较小,因为只要更改了设备方向并且面的方向将相对于该方向,您就可以进行videoOrientation

就我而言,我的应用程序使用YUV420作为所需格式,因此实时使用CIDetector(与RGB一起使用)是不可行的。使用AVCaptureMetadataOutput节省了大量精力,并且由于持续跟踪而执行更可靠

一旦我有了面部的边界框,我编写了额外的功能,例如皮肤检测,并将其应用于静止图像。

注意:捕获静止图像时,面板信息会随元数据一起添加,因此不会出现同步问题。

您还可以使用两者的组合来获得更好的效果。

根据您的申请,探索并评估利弊。

<强>更新

脸部矩形是wrt图像原点。因此,对于屏幕,它可能会有所不同。 使用以下内容:

for (AVMetadataFaceObject *faceFeatures in metadataObjects) {
    CGRect face = faceFeatures.bounds;
    CGRect facePreviewBounds = CGRectMake(face.origin.y * previewLayerRect.size.width,
                               face.origin.x * previewLayerRect.size.height,
                               face.size.width * previewLayerRect.size.height,
                               face.size.height * previewLayerRect.size.width);

    /* Draw rectangle facePreviewBounds on screen */
}

答案 1 :(得分:6)

要在iOS上执行面部检测,有CIDetector(Apple) 或Mobile Vision(Google)API。

IMO,Google Mobile Vision提供了更好的性能。

如果您有兴趣,here is the project you can play with. (iOS 10.2, Swift 3)

2017年WWDC之后,Apple在iOS 11中引入了CoreML Vision 框架使面部检测更加准确:)

我做了Demo Project。包含 Vision v.s. CIDetector 即可。此外,它还包含实时的面部地标检测。

答案 2 :(得分:1)

extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
  func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
      findFaceControl = false
      let faces = metadata.flatMap { $0 as? AVMetadataFaceObject } .flatMap { (face) -> CGRect in
                  guard let localizedFace =
      previewLayer?.transformedMetadataObject(for: face) else { return nil }
                  return localizedFace.bounds }
      for face in faces {
        let temp = UIView(frame: face)
        temp.layer.borderColor = UIColor.white
        temp.layer.borderWidth = 2.0
        view.addSubview(view: temp)
      }
    }
  }
}

请务必删除didOutputMetadataObjects创建的视图。

跟踪活跃的面部ids是最好的方法^

此外,当您尝试查找预览图层的面部位置时,使用面部数据和变换会更加容易。另外我认为CIDetector是垃圾,metadataoutput将使用硬件进行人脸检测,使其非常快。

答案 3 :(得分:1)

有点晚了,但这里是坐标问题的解决方案。您可以在预览图层上调用一种方法将元数据对象转换为坐标系:transformedMetadataObject(适用于:metadataObject)。

guard let transformedObject = previewLayer.transformedMetadataObject(for: metadataObject) else {
     continue
}
let bounds = transformedObject.bounds
showBounds(at: bounds)

来源:https://developer.apple.com/documentation/avfoundation/avcapturevideopreviewlayer/1623501-transformedmetadataobjectformeta

顺便说一下,如果您正在使用(或升级您的项目)Swift 4,AVCaptureMetadataOutputsObject的委托方法已更改为:

func metadataOutput(_ output: AVCaptureMetadataOutput, didOutput metadataObjects: [AVMetadataObject], from connection: AVCaptureConnection)

亲切的问候

答案 4 :(得分:0)

通过查看您的代码,我发现了两件可能导致错误/不良人脸检测的事情。

  1. 其中一个是面部检测器功能选项,您可以通过 [CIDetectorSmile: true, CIDetectorEyeBlink: true] 过滤结果。尝试将其设置为nil: faceDetector?.features(in: faceImage, options: nil)
  2. 我的另一个猜测是结果图像方向。我注意到您使用AVCapturePhotoOutput.jpegPhotoDataRepresentation方法生成检测和系统的源图像,默认情况下,它生成具有Left / LandscapeLeft类型的特定方向的图像,我认为。因此,基本上您可以通过使用CIDetectorImageOrientation键告诉面部检测器记住这一点。
  3.   

    CIDetectorImageOrientation :此键的值是1..8的整数NSNumber,例如kCGImagePropertyOrientation中的值。如果存在,将根据该方向进行检测,但返回的要素中的坐标仍将基于图像的坐标。

    尝试将其设置为 faceDetector?.features(in: faceImage, options: [CIDetectorImageOrientation: 8 /*Left, bottom*/])

答案 5 :(得分:0)

  1. 创建 CaptureSession
  2. 对于AVCaptureVideoDataOutput创建以下设置

    output.videoSettings = [kCVPixelBufferPixelFormatTypeKey as AnyHashable:Int(kCMPixelFormat_32BGRA)]

  3. 3.当您收到CMSampleBuffer时,请创建图像

    DispatchQueue.main.async {
        let sampleImg = self.imageFromSampleBuffer(sampleBuffer: sampleBuffer)
        self.imageView.image = sampleImg
    }
    func imageFromSampleBuffer(sampleBuffer : CMSampleBuffer) -> UIImage
        {
            // Get a CMSampleBuffer's Core Video image buffer for the media data
            let  imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
            // Lock the base address of the pixel buffer
            CVPixelBufferLockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);
    
    
            // Get the number of bytes per row for the pixel buffer
            let baseAddress = CVPixelBufferGetBaseAddress(imageBuffer!);
    
            // Get the number of bytes per row for the pixel buffer
            let bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer!);
            // Get the pixel buffer width and height
            let width = CVPixelBufferGetWidth(imageBuffer!);
            let height = CVPixelBufferGetHeight(imageBuffer!);
    
            // Create a device-dependent RGB color space
            let colorSpace = CGColorSpaceCreateDeviceRGB();
    
            // Create a bitmap graphics context with the sample buffer data
            var bitmapInfo: UInt32 = CGBitmapInfo.byteOrder32Little.rawValue
            bitmapInfo |= CGImageAlphaInfo.premultipliedFirst.rawValue & CGBitmapInfo.alphaInfoMask.rawValue
            //let bitmapInfo: UInt32 = CGBitmapInfo.alphaInfoMask.rawValue
            let context = CGContext.init(data: baseAddress, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
            // Create a Quartz image from the pixel data in the bitmap graphics context
            let quartzImage = context?.makeImage();
            // Unlock the pixel buffer
            CVPixelBufferUnlockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);
    
            // Create an image object from the Quartz image
            let image = UIImage.init(cgImage: quartzImage!);
    
            return (image);
        }