如何使用Tesseract在Swift中以小视图检测文本?

时间:2018-12-08 12:58:08

标签: ios swift ocr tesseract

我正在使用 Tesseract 创建应用,以识别文本。我创建了一个小视图,并在该小视图上启动了捕获会话,因此只能检测到出现在该视图上的文本。但它会识别小视图中的文本,如“图片”所示。 enter image description here

如何解决此问题。我需要在以下代码中进行任何更改以获取我在 iOS应用中想要的输出吗?

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
        return
    }
    var imageRequestOptions = [VNImageOption: Any]()
    if let cameraData = CMGetAttachment(sampleBuffer, kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, nil) {
        imageRequestOptions[.cameraIntrinsics] = cameraData
    }
    let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: CGImagePropertyOrientation(rawValue: 6)!, options: imageRequestOptions)
    do {
        try imageRequestHandler.perform([textDetectionRequest!])
    }
    catch {
        print("Error occured \(error)")
    }
    var ciImage = CIImage(cvPixelBuffer: pixelBuffer)
    let transform = ciImage.orientationTransform(for: CGImagePropertyOrientation(rawValue: 6)!)
    ciImage = ciImage.transformed(by: transform)
    let size = ciImage.extent.size
    var recognizedTextPositionTuples = [(rect: CGRect, text: String)]()
    for textObservation in textObservations {
        guard let rects = textObservation.characterBoxes else {
            continue
        }
        var xMin = CGFloat.greatestFiniteMagnitude
        var xMax: CGFloat = 0
        var yMin = CGFloat.greatestFiniteMagnitude
        var yMax: CGFloat = 0
        for rect in rects {
            xMin = min(xMin, rect.bottomLeft.x)
            xMax = max(xMax, rect.bottomRight.x)
            yMin = min(yMin, rect.bottomRight.y)
            yMax = max(yMax, rect.topRight.y)
        }
        let imageRect = CGRect(x: xMin * size.width, y: yMin * size.height, width: (xMax - xMin) * size.width, height: (yMax - yMin) * size.height)
        let context = CIContext(options: nil)
        guard let cgImage = context.createCGImage(ciImage, from: imageRect) else {
            continue
        }

        let uiImage = UIImage(cgImage: cgImage)
        tesseract?.maximumRecognitionTime = 4.0
        tesseract?.image = uiImage
        tesseract?.recognize()
        DispatchQueue.main.async {
            self.imagePreview.image = uiImage
        }

        guard var text = tesseract?.recognizedText else {
            continue
        }
        text = text.trimmingCharacters(in: CharacterSet.newlines)
        if !text.isEmpty {
            let x = xMin
            let y = 1 - yMax
            let width = xMax - xMin
            let height = yMax - yMin
            recognizedTextPositionTuples.append((rect: CGRect(x: x, y: y, width: width, height: height), text: text))
        }
    }
    textObservations.removeAll()
    DispatchQueue.main.async {
        let viewWidth = self.cameraLayer.bounds.size.width
        let viewHeight = self.cameraLayer.bounds.size.height
        guard let sublayers = self.cameraLayer.layer.sublayers else {
            return
        }
        for layer in sublayers[1...] {

            if let _ = layer as? CATextLayer {
                layer.removeFromSuperlayer()
            }
        }
        for tuple in recognizedTextPositionTuples {
            let textLayer = CATextLayer()
            textLayer.backgroundColor = UIColor.clear.cgColor
            textLayer.font = self.font
            var rect = tuple.rect

            rect.origin.x *= viewWidth
            rect.size.width *= viewWidth
            rect.origin.y *= viewHeight
            rect.size.height *= viewHeight

            // Increase the size of text layer to show text of large lengths
            rect.size.width += 100
            rect.size.height += 100

            textLayer.frame = rect
            textLayer.string = tuple.text
            textLayer.foregroundColor = UIColor.green.cgColor
            self.cameraLayer.layer.addSublayer(textLayer)
        }
    }
}

0 个答案:

没有答案