我正在开发一个具有遮罩脸部功能并使用遮罩录制视频的功能的应用程序。在对每个缓冲区进行采样时,我们将使用AVAssetWriter记录视频,并在记录每个采样缓冲区之前为其添加面罩。
我们正在使用Google Mobile Vision SDK进行面部检测。
这里的问题是,当视频以纵向方向录制时,视频始终以横向显示,而蒙版保持为纵向。
这是我的代码。
我尝试设置不同的视频方向,但似乎无济于事。
-(void)startRecordingWithAssetWriter:(NSURL *)url{
/* to prepare for output; I'll output 640x480 in H.264, via an asset writer */
NSDictionary *outputSettings =
[NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithInt:640], AVVideoWidthKey,
[NSNumber numberWithInt:480], AVVideoHeightKey,
AVVideoCodecH264, AVVideoCodecKey,
nil];
_assetWriterInput = [AVAssetWriterInput
assetWriterInputWithMediaType:AVMediaTypeVideo
outputSettings:outputSettings];
/* I'm going to push pixel buffers to it, so will need a
AVAssetWriterPixelBufferAdaptor, to expect the same 32BGRA input as I've
asked the AVCaptureVideDataOutput to supply */
_pixelBufferAdaptor =
[[AVAssetWriterInputPixelBufferAdaptor alloc]
initWithAssetWriterInput:_assetWriterInput
sourcePixelBufferAttributes:
[NSDictionary dictionaryWithObjectsAndKeys:
[NSNumber numberWithInt:kCVPixelFormatType_32BGRA],
kCVPixelBufferPixelFormatTypeKey,
nil]];
/* that's going to go somewhere, I imagine you've got the URL for that sorted,
so create a suitable asset writer; we'll put our H.264 within the normal
MPEG4 container */
_assetWriter = [[AVAssetWriter alloc]
initWithURL:url
fileType:AVFileTypeMPEG4
error:nil];
[_assetWriter addInput:_assetWriterInput];
/* we need to warn the input to expect real time data incoming, so that it tries
to avoid being unavailable at inopportune moments */
_assetWriterInput.expectsMediaDataInRealTime = YES;
[_assetWriter startWriting];
[_assetWriter startSessionAtSourceTime:kCMTimeZero];
[_session startRunning];
}
-(void)stopAssetWriterRecording{
[_session stopRunning];
[_assetWriter finishWriting];
}
- (void)captureOutput:(AVCaptureOutput *)captureOutput
didOutputSampleBuffer:(CMSampleBufferRef)sampleBuffer
fromConnection:(AVCaptureConnection *)connection {
NSLog(@"Video Orientaion %ld",(long)connection.videoOrientation);
if(!self.faceRecognitionIsOn){
dispatch_sync(dispatch_get_main_queue(), ^{
[[self.preview subviews] makeObjectsPerformSelector:@selector(removeFromSuperview)];
});
[self.session addOutput:_movieFileOutput];
return;
}
CGFloat cpuLoadPercentage = cpu_usage();
NSArray *devices = [AVCaptureDevice devices];
NSError *error;
if(cpuLoadPercentage > 100 && fpsCount > 8){
NSLog(@"FPS COUNT: %i, %f", fpsCount, cpuLoadPercentage);
fpsCount--;
}
for (AVCaptureDevice *device in devices){
if ([device hasMediaType:AVMediaTypeVideo]) {
if([device lockForConfiguration:&error]) {
[device setActiveVideoMaxFrameDuration:CMTimeMake(1, fpsCount)];
[device setActiveVideoMinFrameDuration:CMTimeMake(1, fpsCount)];
[device unlockForConfiguration];
}
}
}
UIImage *image = [GMVUtility sampleBufferTo32RGBA:sampleBuffer];
AVCaptureDevicePosition devicePosition = self.position ? AVCaptureDevicePositionFront : AVCaptureDevicePositionBack;
// Establish the image orientation.
UIDeviceOrientation deviceOrientation = [[UIDevice currentDevice] orientation];
if(deviceOrientation == UIDeviceOrientationUnknown)
{
deviceOrientation = UIDeviceOrientationPortrait;
}
GMVImageOrientation orientation = [GMVUtility
imageOrientationFromOrientation:deviceOrientation
withCaptureDevicePosition:devicePosition
defaultDeviceOrientation:UIDeviceOrientationPortrait];
NSDictionary *options = @{
GMVDetectorImageOrientation : @(orientation)
};
// Detect features using GMVDetector.
NSArray<GMVFaceFeature *> *faces = [self.faceDetector featuresInImage:image options:options];
// The video frames captured by the camera are a different size than the video preview.
// Calculates the scale factors and offset to properly display the features.
CMFormatDescriptionRef fdesc = CMSampleBufferGetFormatDescription(sampleBuffer);
CGRect clap = CMVideoFormatDescriptionGetCleanAperture(fdesc, false);
CGSize parentFrameSize = self.captureVideoPreviewLayer.frame.size;
// Assume AVLayerVideoGravityResizeAspect
CGFloat cameraRatio = clap.size.height / clap.size.width;
CGFloat viewRatio = parentFrameSize.width / parentFrameSize.height;
CGFloat xScale = 1;
CGFloat yScale = 1;
CGRect videoBox = CGRectZero;
videoBox.size.width = parentFrameSize.width;
videoBox.size.height = clap.size.width * (parentFrameSize.width / clap.size.height);
videoBox.origin.x = (videoBox.size.width - parentFrameSize.width) / 2;
videoBox.origin.y = (parentFrameSize.height - videoBox.size.height) / 2;
xScale = videoBox.size.width / clap.size.height;
yScale = videoBox.size.height / clap.size.width;
// let context = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer), width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer), bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer), space: self.sDeviceRgbColorSpace, bitmapInfo: self.bitmapInfo.rawValue)!
CVImageBufferRef pixelbuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CVPixelBufferLockBaseAddress(pixelbuffer, 0);
CGContextRef context = CGBitmapContextCreate(CVPixelBufferGetBaseAddress(pixelbuffer),
CVPixelBufferGetWidth(pixelbuffer),
CVPixelBufferGetHeight(pixelbuffer),
8,
CVPixelBufferGetBytesPerRow(pixelbuffer), // bytesPerRow ??
CGColorSpaceCreateDeviceRGB(),
kCGImageAlphaPremultipliedLast);
dispatch_sync(dispatch_get_main_queue(), ^{
// Remove previously added feature views.
[[self.preview subviews] makeObjectsPerformSelector:@selector(removeFromSuperview)];
// Display detected features in overlay.
for (GMVFaceFeature *face in faces) {
funnyFaceImageView = [UIImageView new];
funnyFaceImageView.image = self.funnyFaceImage;
CGRect faceRect = [self scaledRect:face.bounds
xScale:xScale
yScale:yScale
offset:videoBox.origin];
CGPoint mouthB, mouthL, mouthR, mouth;
CGPoint nose, cheekL, cheekR;
CGPoint earL, earR;
CGPoint eyeL, eyeR;
mouthB = mouthL = mouthR = mouth = nose = cheekL = cheekR = earL = earR = eyeL = eyeR = CGPointZero;
// Mouth
if (face.hasBottomMouthPosition) {
CGPoint point = [self scaledPoint:face.bottomMouthPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
mouthB = point;
}
if (face.hasMouthPosition) {
CGPoint point = [self scaledPoint:face.mouthPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
mouth = point;
}
if (face.hasRightMouthPosition) {
CGPoint point = [self scaledPoint:face.rightMouthPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
mouthR = point;
}
if (face.hasLeftMouthPosition) {
CGPoint point = [self scaledPoint:face.leftMouthPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
mouthL = point;
}
// Nose
if (face.hasNoseBasePosition) {
CGPoint point = [self scaledPoint:face.noseBasePosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
nose = point;
}
// Eyes
if (face.hasLeftEyePosition) {
CGPoint point = [self scaledPoint:face.leftEyePosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
eyeL = point;
}
if (face.hasRightEyePosition) {
CGPoint point = [self scaledPoint:face.rightEyePosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
eyeR = point;
}
// Ears
if (face.hasLeftEarPosition) {
CGPoint point = [self scaledPoint:face.leftEarPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
earL = point;
}
if (face.hasRightEarPosition) {
CGPoint point = [self scaledPoint:face.rightEarPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
earR = point;
}
// Cheeks
if (face.hasLeftCheekPosition) {
CGPoint point = [self scaledPoint:face.leftCheekPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
cheekL = point;
}
if (face.hasRightCheekPosition) {
CGPoint point = [self scaledPoint:face.rightCheekPosition
xScale:xScale
yScale:yScale
offset:videoBox.origin];
cheekR = point;
}
CGFloat faceAngle = - (M_PI / 180) * face.headEulerAngleZ;
funnyFaceImageView.frame = faceRect;
CGFloat shift = 100.0f;
// CGPoint maskEyeR = CGPointMake(972, 799);
CGPoint maskEyeL = CGPointMake(789 - shift, 799);
CGFloat realEyeDistance = eyeR.x - eyeL.x;
CGFloat realEyeDistanceY = eyeR.y - eyeL.y;
// CGFloat maskEyeDistance = maskEyeR.x - maskEyeL.x;
CGFloat maskEyeDistance = 283.0f;
CGFloat proximityRatio = 1.3f;
CGFloat ratio = sqrtf(realEyeDistance * realEyeDistance + realEyeDistanceY * realEyeDistanceY) / maskEyeDistance * proximityRatio;
CGPoint eyeLScaled = CGPointMake(maskEyeL.x * ratio, maskEyeL.y * ratio);
// CGPoint eyeRScaled = CGPointMake(maskEyeL.x / ratio, maskEyeL.y / ratio);
CGSize sizeScaled = CGSizeMake(funnyFaceImageView.image.size.height * ratio, funnyFaceImageView.image.size.height * ratio);
funnyFaceImageView.frame = CGRectMake(eyeL.x - eyeLScaled.x,
eyeL.y - eyeLScaled.y,
sizeScaled.width,
sizeScaled.height);
// CGRect imageframe;
CGRect transformedBounds;
if(face.hasHeadEulerAngleZ){
funnyFaceImageView.transform = CGAffineTransformRotate(CGAffineTransformIdentity, faceAngle);
// imageframe = funnyFaceImageView.frame
transformedBounds = CGRectApplyAffineTransform(CGRectMake(self.preview.frame.origin.x, self.preview.origin.y, funnyFaceImageView.frame.size.width, funnyFaceImageView.frame.size.height), CGAffineTransformRotate(CGAffineTransformIdentity, faceAngle));
} else {
funnyFaceImageView.transform = CGAffineTransformIdentity;
transformedBounds = CGRectApplyAffineTransform(CGRectMake(self.preview.frame.origin.x, self.preview.origin.y, funnyFaceImageView.frame.size.width, funnyFaceImageView.frame.size.height), CGAffineTransformIdentity);
}
funnyFaceImageView.contentMode = UIViewContentModeScaleToFill;
// context.draw(metadata.image.cgImage!, in: metadata.faceRect)
CGContextSetBlendMode(context, kCGBlendModeCopy);
CGContextDrawImage(context, transformedBounds , funnyFaceImageView.image.CGImage);
CGContextRotateCTM(context, faceAngle);
// CGImageRelease(cgImage);
[self.preview addSubview:funnyFaceImageView];
// Tracking Id.
if (face.hasTrackingID) {
CGPoint point = [self scaledPoint:face.bounds.origin
xScale:xScale
yScale:yScale
offset:videoBox.origin];
UILabel *label = [[UILabel alloc] initWithFrame:CGRectMake(point.x, point.y, 100, 20)];
label.text = [NSString stringWithFormat:@"id: %lu", (unsigned long)face.trackingID];
}
}
// CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
CGContextRelease(context);
// a very dense way to keep track of the time at which this frame
// occurs relative to the output stream, but it's just an example!
static int64_t frameNumber = 0;
if(self.assetWriterInput.readyForMoreMediaData)
[self.pixelBufferAdaptor appendPixelBuffer:pixelbuffer
withPresentationTime:CMTimeMake(frameNumber, 25)];
frameNumber++;
CVPixelBufferUnlockBaseAddress(pixelbuffer, 0);
});
}
实际记录:
输出: