我从GitHub存储库下载了一个算法,用于基本的面部地标识别目的。
这个项目是Objective C代码和Swift代码的混合,我在调整Objective C方面遇到了问题。
代码:
这是检测相机拍摄的图像中所有地标点的主要功能:
- (void)doWorkOnSampleBuffer:(CMSampleBufferRef)sampleBuffer inRects:(NSArray<NSValue *> *)rects {
if (!self.prepared) {
[self prepare];
}
dlib::array2d<dlib::bgr_pixel> img;
// MARK: magic
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
char *baseBuffer = (char *)CVPixelBufferGetBaseAddress(imageBuffer);
// set_size expects rows, cols format
img.set_size(height, width);
// copy samplebuffer image data into dlib image format
img.reset();
long position = 0;
while (img.move_next()) {
dlib::bgr_pixel& pixel = img.element();
// assuming bgra format here
long bufferLocation = position * 4; //(row * width + column) * 4;
char b = baseBuffer[bufferLocation];
char g = baseBuffer[bufferLocation + 1];
char r = baseBuffer[bufferLocation + 2];
// we do not need this
// char a = baseBuffer[bufferLocation + 3];
dlib::bgr_pixel newpixel(b, g, r);
pixel = newpixel;
position++;
}
// unlock buffer again until we need it again
CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
// convert the face bounds list to dlib format
std::vector<dlib::rectangle> convertedRectangles = [DlibWrapper convertCGRectValueArray:rects];
// for every detected face
for (unsigned long j = 0; j < convertedRectangles.size(); ++j)
{
dlib::rectangle oneFaceRect = convertedRectangles[j];
// detect all landmarks
dlib::full_object_detection shape = sp(img, oneFaceRect);
// and draw them into the image (samplebuffer)
for (unsigned long k = 0; k < shape.num_parts(); k++) {
dlib::point p = shape.part(k);
NSLog(@"point x,y: %ld,%ldd", p.x(), p.y());
draw_solid_circle(img, p, 3, dlib::rgb_pixel(0, 255, 255));
}
}
// lets put everything back where it belongs
CVPixelBufferLockBaseAddress(imageBuffer, 0);
// copy dlib image data back into samplebuffer
img.reset();
position = 0;
while (img.move_next()) {
dlib::bgr_pixel& pixel = img.element();
// assuming bgra format here
long bufferLocation = position * 4; //(row * width + column) * 4;
baseBuffer[bufferLocation] = pixel.blue;
baseBuffer[bufferLocation + 1] = pixel.green;
baseBuffer[bufferLocation + 2] = pixel.red;
// we do not need this
// char a = baseBuffer[bufferLocation + 3];
position++;
}
CVPixelBufferUnlockBaseAddress(imageBuffer, 0);
}
这是处理相机图像捕捉的主要类(在Swift中):
import AVFoundation
class SessionHandler : NSObject, AVCaptureVideoDataOutputSampleBufferDelegate, AVCaptureMetadataOutputObjectsDelegate {
var session = AVCaptureSession()
let layer = AVSampleBufferDisplayLayer()
let sampleQueue = DispatchQueue(label: "com.zweigraf.DisplayLiveSamples.sampleQueue", attributes: [])
let faceQueue = DispatchQueue(label: "com.zweigraf.DisplayLiveSamples.faceQueue", attributes: [])
let wrapper = DlibWrapper()
var currentMetadata: [AnyObject]
override init() {
currentMetadata = []
super.init()
}
func openSession() {
let device = AVCaptureDevice.devices(withMediaType: AVMediaTypeVideo)
.map { $0 as! AVCaptureDevice }
.filter { $0.position == .front}
.first!
let input = try! AVCaptureDeviceInput(device: device)
let output = AVCaptureVideoDataOutput()
output.setSampleBufferDelegate(self, queue: sampleQueue)
let metaOutput = AVCaptureMetadataOutput()
metaOutput.setMetadataObjectsDelegate(self, queue: faceQueue)
session.beginConfiguration()
if session.canAddInput(input) {
session.addInput(input)
}
if session.canAddOutput(output) {
session.addOutput(output)
}
if session.canAddOutput(metaOutput) {
session.addOutput(metaOutput)
}
session.commitConfiguration()
let settings: [AnyHashable: Any] = [kCVPixelBufferPixelFormatTypeKey as AnyHashable: Int(kCVPixelFormatType_32BGRA)]
output.videoSettings = settings
// availableMetadataObjectTypes change when output is added to session.
// before it is added, availableMetadataObjectTypes is empty
metaOutput.metadataObjectTypes = [AVMetadataObjectTypeFace]
wrapper?.prepare()
session.startRunning()
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {
if !currentMetadata.isEmpty {
var boundsArray = currentMetadata
.flatMap { $0 as? AVMetadataFaceObject }
.map { (faceObject) -> NSValue in
let convertedObject = captureOutput.transformedMetadataObject(for: faceObject, connection: connection)
return NSValue(cgRect: convertedObject!.bounds)
}
wrapper?.doWork(sampleBuffer, inRects: boundsArray)
}
layer.enqueue(sampleBuffer)
}
func captureOutput(_ captureOutput: AVCaptureOutput!, didDrop sampleBuffer: CMSampleBuffer!, from connection: AVCaptureConnection!) {
//print("DidDropSampleBuffer")
}
// MARK: AVCaptureMetadataOutputObjectsDelegate
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
currentMetadata = metadataObjects as [AnyObject]
//print("currentMetadata: ", currentMetadata[0].point)
}
}
所以,我试图弄清楚如何抓住函数 doWorkOnSampleBuffer 检测到的点的坐标(x,y),并在函数 captureOutput中读取它们
我的最终目标是创建一个与相机图像重叠的遮罩以及 doWorkOnSampleBuffer 函数绘制的所有点。
&GT;&GT;我认为我必须做什么,但我不知道该怎么做:
我认为我必须替换函数 doWorkOnSampleBuffer 来返回点 p 的值,而不是返回 void ,并且还要更改在 SessionHandler 中调用的函数类似于:
let point = CGPoint()
point = wrapper?.doWork(sampleBuffer, inRects: boundsArray)
注1:我没有在Objective C函数中创建这个掩码,因为我不太了解如何处理Objective C,但是我可以处理Swift代码。
注2:完整项目可从以下网址下载:https://github.com/zweigraf/face-landmarking-ios/
任何?