Question

我从GitHub存储库下载了一个算法，用于基本的面部地标识别目的。

这个项目是Objective C代码和Swift代码的混合，我在调整Objective C方面遇到了问题。

代码：

这是检测相机拍摄的图像中所有地标点的主要功能：

- (void)doWorkOnSampleBuffer:(CMSampleBufferRef)sampleBuffer inRects:(NSArray<NSValue *> *)rects {

if (!self.prepared) {
    [self prepare];
}

dlib::array2d<dlib::bgr_pixel> img;

// MARK: magic
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);

size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
char *baseBuffer = (char *)CVPixelBufferGetBaseAddress(imageBuffer);

// set_size expects rows, cols format
img.set_size(height, width);

// copy samplebuffer image data into dlib image format
img.reset();
long position = 0;
while (img.move_next()) {
    dlib::bgr_pixel& pixel = img.element();

    // assuming bgra format here
    long bufferLocation = position * 4; //(row * width + column) * 4;
    char b = baseBuffer[bufferLocation];
    char g = baseBuffer[bufferLocation + 1];
    char r = baseBuffer[bufferLocation + 2];
    //        we do not need this
    //        char a = baseBuffer[bufferLocation + 3];

    dlib::bgr_pixel newpixel(b, g, r);
    pixel = newpixel;

    position++;
}

// unlock buffer again until we need it again
CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);

// convert the face bounds list to dlib format
std::vector<dlib::rectangle> convertedRectangles = [DlibWrapper convertCGRectValueArray:rects];

// for every detected face
for (unsigned long j = 0; j < convertedRectangles.size(); ++j)
{
    dlib::rectangle oneFaceRect = convertedRectangles[j];

    // detect all landmarks
    dlib::full_object_detection shape = sp(img, oneFaceRect);


    // and draw them into the image (samplebuffer)
    for (unsigned long k = 0; k < shape.num_parts(); k++) {
        dlib::point p = shape.part(k);

        NSLog(@"point x,y: %ld,%ldd", p.x(), p.y());


        draw_solid_circle(img, p, 3, dlib::rgb_pixel(0, 255, 255));

    }
}

// lets put everything back where it belongs
CVPixelBufferLockBaseAddress(imageBuffer, 0);

// copy dlib image data back into samplebuffer
img.reset();
position = 0;
while (img.move_next()) {
    dlib::bgr_pixel& pixel = img.element();

    // assuming bgra format here
    long bufferLocation = position * 4; //(row * width + column) * 4;
    baseBuffer[bufferLocation] = pixel.blue;
    baseBuffer[bufferLocation + 1] = pixel.green;
    baseBuffer[bufferLocation + 2] = pixel.red;
    //        we do not need this
    //        char a = baseBuffer[bufferLocation + 3];

    position++;
}
CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
}

这是处理相机图像捕捉的主要类（在Swift中）：

import AVFoundation

class SessionHandler : NSObject, AVCaptureVideoDataOutputSampleBufferDelegate, AVCaptureMetadataOutputObjectsDelegate {

var session = AVCaptureSession()
let layer = AVSampleBufferDisplayLayer()
let sampleQueue = DispatchQueue(label: "com.zweigraf.DisplayLiveSamples.sampleQueue", attributes: [])
let faceQueue = DispatchQueue(label: "com.zweigraf.DisplayLiveSamples.faceQueue", attributes: [])
let wrapper = DlibWrapper()

var currentMetadata: [AnyObject]

override init() {
    currentMetadata = []
    super.init()
}

func openSession() {
    let device = AVCaptureDevice.devices(withMediaType: AVMediaTypeVideo)
        .map { $0 as! AVCaptureDevice }
        .filter { $0.position == .front}
        .first!

    let input = try! AVCaptureDeviceInput(device: device)

    let output = AVCaptureVideoDataOutput()
    output.setSampleBufferDelegate(self, queue: sampleQueue)

    let metaOutput = AVCaptureMetadataOutput()
    metaOutput.setMetadataObjectsDelegate(self, queue: faceQueue)

    session.beginConfiguration()

    if session.canAddInput(input) {
        session.addInput(input)
    }
    if session.canAddOutput(output) {
        session.addOutput(output)
    }
    if session.canAddOutput(metaOutput) {
        session.addOutput(metaOutput)
    }

    session.commitConfiguration()

    let settings: [AnyHashable: Any] = [kCVPixelBufferPixelFormatTypeKey as AnyHashable: Int(kCVPixelFormatType_32BGRA)]
    output.videoSettings = settings

    // availableMetadataObjectTypes change when output is added to session.
    // before it is added, availableMetadataObjectTypes is empty
    metaOutput.metadataObjectTypes = [AVMetadataObjectTypeFace]

    wrapper?.prepare()

    session.startRunning()
}

// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {

    if !currentMetadata.isEmpty {
        var boundsArray = currentMetadata
            .flatMap { $0 as? AVMetadataFaceObject }
            .map { (faceObject) -> NSValue in
                let convertedObject = captureOutput.transformedMetadataObject(for: faceObject, connection: connection)
                return NSValue(cgRect: convertedObject!.bounds)
        }

        wrapper?.doWork(sampleBuffer, inRects: boundsArray)  
    }

    layer.enqueue(sampleBuffer)
}

func captureOutput(_ captureOutput: AVCaptureOutput!, didDrop sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {
    //print("DidDropSampleBuffer")
}

// MARK: AVCaptureMetadataOutputObjectsDelegate

func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    currentMetadata = metadataObjects as [AnyObject]
    //print("currentMetadata: ", currentMetadata[0].point)
}
}

所以，我试图弄清楚如何抓住函数 doWorkOnSampleBuffer 检测到的点的坐标（x，y），并在函数 captureOutput中读取它们

我的最终目标是创建一个与相机图像重叠的遮罩以及 doWorkOnSampleBuffer 函数绘制的所有点。

＆GT;＆GT;我认为我必须做什么，但我不知道该怎么做：

我认为我必须替换函数 doWorkOnSampleBuffer 来返回点 p 的值，而不是返回 void ，并且还要更改在 SessionHandler 中调用的函数类似于：

let point = CGPoint()
point = wrapper?.doWork(sampleBuffer, inRects: boundsArray)

注1：我没有在Objective C函数中创建这个掩码，因为我不太了解如何处理Objective C，但是我可以处理Swift代码。

注2：完整项目可从以下网址下载：https://github.com/zweigraf/face-landmarking-ios/

任何？

从Dlib库到我的Swift Code抓取面部标志点

0 个答案: