我是Swift的新手,我正在尝试实施一些项目。我从Github获得了此代码,并且工作正常。
当您单击该应用程序时,它将在iPhone屏幕上启动视频,并使用'TesseractOCR'
检测字母和字符。
问题是视频覆盖了所有屏幕,我无法添加任何按钮。如果我添加一个按钮,该按钮将消失在视频下方。
我尝试添加session.sessionPreset = .photo
来从上到下裁剪视频,但是没有用。我也尝试添加preview.sessionPreset = .photo
,但也无法正常工作
注意:Main.storyboard
为空。
代码如下:
import AVFoundation
import UIKit
import Vision
import TesseractOCR
class ViewController: UIViewController, G8TesseractDelegate {
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.
tesseract?.pageSegmentationMode = .sparseText
// Recognize only these characters
// tesseract?.charWhitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890()-+*!/?.,@#$%&"
tesseract?.charWhitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890"
if isAuthorized() {
configureTextDetection()
configureCamera()
}
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
// Dispose of any resources that can be recreated.
}
private func configureTextDetection() {
textDetectionRequest = VNDetectTextRectanglesRequest(completionHandler: handleDetection)
textDetectionRequest?.reportCharacterBoxes = true
}
private func configureCamera() {
preview.session = session
let cameraDevices = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .back)
var cameraDevice: AVCaptureDevice?
for device in cameraDevices.devices {
if device.position == .back {
cameraDevice = device
break
}
}
do {
let captureDeviceInput = try AVCaptureDeviceInput(device: cameraDevice!)
if session.canAddInput(captureDeviceInput) {
session.addInput(captureDeviceInput)
}
}
catch {
print("Error occured \(error)")
return
}
session.sessionPreset = .photo // It was .high
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "Buffer Queue", qos: .userInteractive, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil))
if session.canAddOutput(videoDataOutput) {
session.addOutput(videoDataOutput)
}
preview.videoPreviewLayer.videoGravity = .resize
session.startRunning()
}
private func handleDetection(request: VNRequest, error: Error?) {
guard let detectionResults = request.results else {
print("No detection results")
return
}
let textResults = detectionResults.map() {
return $0 as? VNTextObservation
}
if textResults.isEmpty {
return
}
textObservations = textResults as! [VNTextObservation]
DispatchQueue.main.async {
guard let sublayers = self.view.layer.sublayers else {
return
}
for layer in sublayers[1...] {
if (layer as? CATextLayer) == nil {
layer.removeFromSuperlayer()
}
}
let viewWidth = self.view.frame.size.width
let viewHeight = self.view.frame.size.height
for result in textResults {
if let textResult = result {
let layer = CALayer()
var rect = textResult.boundingBox
rect.origin.x *= viewWidth
rect.size.height *= viewHeight
rect.origin.y = ((1 - rect.origin.y) * viewHeight) - rect.size.height
rect.size.width *= viewWidth
layer.frame = rect
layer.borderWidth = 2
layer.borderColor = UIColor.red.cgColor
self.view.layer.addSublayer(layer)
}
}
}
}
private var preview: PreviewView {
return view as! PreviewView
}
// private var cameraView: CameraView {
// return view as! CameraView
// }
private func isAuthorized() -> Bool {
let authorizationStatus = AVCaptureDevice.authorizationStatus(for: AVMediaType.video)
switch authorizationStatus {
case .notDetermined:
AVCaptureDevice.requestAccess(for: AVMediaType.video,
completionHandler: { (granted:Bool) -> Void in
if granted {
DispatchQueue.main.async {
self.configureTextDetection()
self.configureCamera()
}
}
})
return true
case .authorized:
return true
case .denied, .restricted: return false
}
}
private var textDetectionRequest: VNDetectTextRectanglesRequest?
private let session = AVCaptureSession()
private var textObservations = [VNTextObservation]()
private var tesseract = G8Tesseract(language: "eng", engineMode: .tesseractOnly)
private var font = CTFontCreateWithName("Helvetica" as CFString, 18, nil)
}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
// MARK: - Camera Delegate and Setup
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
var imageRequestOptions = [VNImageOption: Any]()
if let cameraData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) {
imageRequestOptions[.cameraIntrinsics] = cameraData
}
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: CGImagePropertyOrientation(rawValue: 6)!, options: imageRequestOptions)
do {
try imageRequestHandler.perform([textDetectionRequest!])
}
catch {
print("Error occured \(error)")
}
var ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let transform = ciImage.orientationTransform(for: CGImagePropertyOrientation(rawValue: 6)!)
ciImage = ciImage.transformed(by: transform)
let size = ciImage.extent.size
var recognizedTextPositionTuples = [(rect: CGRect, text: String)]()
for textObservation in textObservations {
guard let rects = textObservation.characterBoxes else {
continue
}
var xMin = CGFloat.greatestFiniteMagnitude
var xMax: CGFloat = 0
var yMin = CGFloat.greatestFiniteMagnitude
var yMax: CGFloat = 0
for rect in rects {
xMin = min(xMin, rect.bottomLeft.x)
xMax = max(xMax, rect.bottomRight.x)
yMin = min(yMin, rect.bottomRight.y)
yMax = max(yMax, rect.topRight.y)
}
let imageRect = CGRect(x: xMin * size.width, y: yMin * size.height, width: (xMax - xMin) * size.width, height: (yMax - yMin) * size.height)
let context = CIContext(options: nil)
guard let cgImage = context.createCGImage(ciImage, from: imageRect) else {
continue
}
let uiImage = UIImage(cgImage: cgImage)
tesseract?.image = uiImage
tesseract?.recognize()
guard var text = tesseract?.recognizedText else {
continue
}
text = text.trimmingCharacters(in: CharacterSet.newlines)
if !text.isEmpty {
let x = xMin
let y = 1 - yMax
let width = xMax - xMin
let height = yMax - yMin
recognizedTextPositionTuples.append((rect: CGRect(x: x, y: y, width: width, height: height), text: text))
}
}
textObservations.removeAll()
DispatchQueue.main.async {
let viewWidth = self.view.frame.size.width
let viewHeight = self.view.frame.size.height
guard let sublayers = self.view.layer.sublayers else {
return
}
for layer in sublayers[1...] {
if let _ = layer as? CATextLayer {
layer.removeFromSuperlayer()
}
}
for tuple in recognizedTextPositionTuples {
let textLayer = CATextLayer()
textLayer.backgroundColor = UIColor.clear.cgColor
textLayer.font = self.font
var rect = tuple.rect
rect.origin.x *= viewWidth
rect.size.width *= viewWidth
rect.origin.y *= viewHeight
rect.size.height *= viewHeight
// Increase the size of text layer to show text of large lengths
rect.size.width += 100
rect.size.height += 100
textLayer.frame = rect
textLayer.string = tuple.text
textLayer.foregroundColor = UIColor.green.cgColor
self.view.layer.addSublayer(textLayer)
}
}
}
}
答案 0 :(得分:1)
基本上将CameraView
设置为ViewController
的根视图,这就是为什么您不能更改其大小的原因。您需要将CameraView
设为ViewController
根视图的子视图,以更改其大小。
类似的东西:
ViewController.swift
删除以下内容
private var cameraView: CameraView {
return view as! CameraView
}
将所有cameraView
替换为self.cameraView
添加以下行:
@IBOutlet var cameraView: CameraView!
将所有self.view
替换为self.cameraView
Main.storyboard
Camera View
中选择Document Outline
Identity Inspector
(⌥⌘3
)并清除Class
,其中应包含CameraView
。清除后,它应显示UIView
Library
(⇧⌘L
)并在原始View
内添加一个新的Camera View
(可以随意调整此新视图的大小)Identity Inspector
(⌥⌘3
),然后将Class
更改为CameraView
View Controller
,然后转到Connections Inspector
(⌥⌘6
)CameraView
如果您不喜欢文本超出cameraView
的范围,则可以在viewDidLoad
中添加以下内容:
self.cameraView.clipsToBounds = true