我正在使用 Tesseract 创建应用,以识别文本。我创建了一个小视图,并在该小视图上启动了捕获会话,因此只能检测到出现在该视图上的文本。但它会识别小视图中的文本,如“图片”所示。 enter image description here
如何解决此问题。我需要在以下代码中进行任何更改以获取我在 iOS应用中想要的输出吗?
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
var imageRequestOptions = [VNImageOption: Any]()
if let cameraData = CMGetAttachment(sampleBuffer, kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, nil) {
imageRequestOptions[.cameraIntrinsics] = cameraData
}
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: CGImagePropertyOrientation(rawValue: 6)!, options: imageRequestOptions)
do {
try imageRequestHandler.perform([textDetectionRequest!])
}
catch {
print("Error occured \(error)")
}
var ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let transform = ciImage.orientationTransform(for: CGImagePropertyOrientation(rawValue: 6)!)
ciImage = ciImage.transformed(by: transform)
let size = ciImage.extent.size
var recognizedTextPositionTuples = [(rect: CGRect, text: String)]()
for textObservation in textObservations {
guard let rects = textObservation.characterBoxes else {
continue
}
var xMin = CGFloat.greatestFiniteMagnitude
var xMax: CGFloat = 0
var yMin = CGFloat.greatestFiniteMagnitude
var yMax: CGFloat = 0
for rect in rects {
xMin = min(xMin, rect.bottomLeft.x)
xMax = max(xMax, rect.bottomRight.x)
yMin = min(yMin, rect.bottomRight.y)
yMax = max(yMax, rect.topRight.y)
}
let imageRect = CGRect(x: xMin * size.width, y: yMin * size.height, width: (xMax - xMin) * size.width, height: (yMax - yMin) * size.height)
let context = CIContext(options: nil)
guard let cgImage = context.createCGImage(ciImage, from: imageRect) else {
continue
}
let uiImage = UIImage(cgImage: cgImage)
tesseract?.maximumRecognitionTime = 4.0
tesseract?.image = uiImage
tesseract?.recognize()
DispatchQueue.main.async {
self.imagePreview.image = uiImage
}
guard var text = tesseract?.recognizedText else {
continue
}
text = text.trimmingCharacters(in: CharacterSet.newlines)
if !text.isEmpty {
let x = xMin
let y = 1 - yMax
let width = xMax - xMin
let height = yMax - yMin
recognizedTextPositionTuples.append((rect: CGRect(x: x, y: y, width: width, height: height), text: text))
}
}
textObservations.removeAll()
DispatchQueue.main.async {
let viewWidth = self.cameraLayer.bounds.size.width
let viewHeight = self.cameraLayer.bounds.size.height
guard let sublayers = self.cameraLayer.layer.sublayers else {
return
}
for layer in sublayers[1...] {
if let _ = layer as? CATextLayer {
layer.removeFromSuperlayer()
}
}
for tuple in recognizedTextPositionTuples {
let textLayer = CATextLayer()
textLayer.backgroundColor = UIColor.clear.cgColor
textLayer.font = self.font
var rect = tuple.rect
rect.origin.x *= viewWidth
rect.size.width *= viewWidth
rect.origin.y *= viewHeight
rect.size.height *= viewHeight
// Increase the size of text layer to show text of large lengths
rect.size.width += 100
rect.size.height += 100
textLayer.frame = rect
textLayer.string = tuple.text
textLayer.foregroundColor = UIColor.green.cgColor
self.cameraLayer.layer.addSublayer(textLayer)
}
}
}