当我在iPhone上使用相机拍摄的图像并将其发送到tesseract
时,精确度非常可怕,这些都是垃圾文本,但是当我从照片库中选择相同的图像时,我会获得极高的准确性。
如何通过相机拍摄照片来提高tesseract
的准确度?以下是我在发送
- (void)imagePickerController:(UIImagePickerController *)picker didFinishPickingMediaWithInfo:(NSDictionary *)info {
UIImage *image = info[UIImagePickerControllerOriginalImage];
CGFloat newWidth = 1200;
CGSize newSize = CGSizeMake(newWidth, newWidth);
image = [image resizedImage:newSize interpolationQuality:kCGInterpolationHigh];
Tesseract* tesseract = [[Tesseract alloc]initWithLanguage:@"eng"];
[tesseract setVariableValue:@"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@#&*()-_:." forKey:@"tessedit_char_whitelist"];
[tesseract setVariableValue:@"`~!%^*[]{}<>?|" forKey:@"tessedit_char_blacklist"];
[tesseract setImage:image];
[tesseract recognize];
NSLog(@"%@", [tesseract recognizedText]);
[picker dismissViewControllerAnimated:YES completion:NULL];
}
如果有人发现了一些“神奇”的方式来让tesseract准确无误地扫描,我会奖励赏金!
答案 0 :(得分:1)
要考虑的主要事项:
需要对Tesseract进行所识别的字体和语言的培训。 看起来您在这里提供了适当的参数 - 它们与您提供预定义图像时使用的参数相同吗?
Tesseract不会定位或“清理”图像 当您将图像传递到库时,它应该已经被清理并且是纵向的。是个 从相机拍摄的照片卷轴中的图像,或者它是以某种方式保存在那里的样本图像?
要进行的一项检查是调整大小功能:
CGFloat newWidth = 1200;
CGSize newSize = CGSizeMake(newWidth, newWidth);
由于您要将非方形图像调整为方形图像,原始图像将因此而失真。这肯定会破坏文本并使Tesseract的生活变得困难。至少,您希望保留捕获图像的宽高比。
答案 1 :(得分:-2)
我的代码工作正常,但它更复杂
- (IBAction)captureTapped:(id)sender
{
NSMutableArray *results = [NSMutableArray array];
NSString *fullWord = @"";
OCRImplementation *ocr = [[OCRImplementation alloc] init];
for(int j = 0; j < [self.images count]; j++){
UIImage *imageToTesseract = [self.images objectAtIndex:j];
//UIImage *imageToTesseract = self.imgfinal.image;
NSMutableArray *sortedKeys = [NSMutableArray array];
@try {
sortedKeys = [ocr processImageDetectText:imageToTesseract threadhold:198];
}
@catch (NSException *exception) {
sortedKeys = [NSMutableArray array];
}
NSString *finalWord = @"";
if([sortedKeys count] > 0){
for(int i=0; i<[sortedKeys count]; i++){
UIImage *image = [sortedKeys objectAtIndex:i];
finalWord = [self confidencesOCRTesseract:image];
if(finalWord.length > 1){
finalWord = [NSString stringWithFormat:@"%c",[finalWord characterAtIndex:0]];
}
fullWord = [fullWord stringByAppendingString:finalWord];
}
}
fullWord = [fullWord stringByReplacingOccurrencesOfString:@"\n" withString:@""];
[results addObject:fullWord];
NSLog(@"-- RESULT -- %@",fullWord);
}
NSString *resultWord = @"";
if([results count] > 0){
resultWord = [self calculateStatics:results];
}
//Your text Result
NSLog(@"%@",resultWord);
}
- (NSString*)calculateStatics:(NSMutableArray*)results{
NSMutableArray *first = [NSMutableArray array];
NSMutableArray *second = [NSMutableArray array];
NSMutableArray *third = [NSMutableArray array];
NSMutableArray *fourth = [NSMutableArray array];
NSMutableArray *fifth = [NSMutableArray array];
NSMutableArray *six = [NSMutableArray array];
NSMutableArray *seven = [NSMutableArray array];
for(int i = 0; i<[results count]; i++){
NSString *result = [results objectAtIndex:i];
if(result && ![result isEqualToString:@""]){
if(result.length >= 1 && [result characterAtIndex:0]){
[first addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:0]]];
}else{
[first addObject:@" "];
}
if(result.length >= 2 &&[result characterAtIndex:1]){
[second addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:1]]];
}else{
[second addObject:@" "];
}
if(result.length >= 3 &&[result characterAtIndex:2]){
[third addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:2]]];
}else{
[third addObject:@" "];
}
if(result.length >= 4 &&[result characterAtIndex:3]){
[fourth addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:3]]];
}else{
[fourth addObject:@" "];
}
if(result.length >= 5 &&[result characterAtIndex:4]){
[fifth addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:4]]];
}else{
[fifth addObject:@" "];
}
if(result.length >= 6 &&[result characterAtIndex:5]){
[six addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:5]]];
}else{
[six addObject:@" "];
}
if(result.length >= 7 &&[result characterAtIndex:6]){
[seven addObject:[NSString stringWithFormat:@"%c", [result characterAtIndex:6]]];
}else{
[seven addObject:@" "];
}
}else{
[first addObject:@" "];
[second addObject:@" "];
[third addObject:@" "];
[fourth addObject:@" "];
[fifth addObject:@" "];
[six addObject:@" "];
[seven addObject:@" "];
}
}
NSString *word = @"";
NSCountedSet *frequencies = [NSCountedSet setWithArray:first];
if([frequencies count] == 1){
word = [word stringByAppendingString:[[frequencies allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies count]; i++){
NSString *possibleWord = [[frequencies allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
NSCountedSet *frequencies2 = [NSCountedSet setWithArray:second];
if([frequencies2 count] == 1){
word = [word stringByAppendingString:[[frequencies2 allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies2 count]; i++){
NSString *possibleWord = [[frequencies2 allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies2 countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
NSCountedSet *frequencies3 = [NSCountedSet setWithArray:third];
if([frequencies3 count] == 1){
word = [word stringByAppendingString:[[frequencies3 allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies3 count]; i++){
NSString *possibleWord = [[frequencies3 allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies3 countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
NSCountedSet *frequencies4 = [NSCountedSet setWithArray:fourth];
if([frequencies4 count] == 1){
word = [word stringByAppendingString:[[frequencies4 allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies4 count]; i++){
NSString *possibleWord = [[frequencies4 allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies4 countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
NSCountedSet *frequencies5 = [NSCountedSet setWithArray:fifth];
if([frequencies5 count] == 1){
word = [word stringByAppendingString:[[frequencies5 allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies5 count]; i++){
NSString *possibleWord = [[frequencies5 allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies5 countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
NSCountedSet *frequencies6 = [NSCountedSet setWithArray:six];
if([frequencies6 count] == 1){
word = [word stringByAppendingString:[[frequencies6 allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies6 count]; i++){
NSString *possibleWord = [[frequencies6 allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies6 countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
NSCountedSet *frequencies7 = [NSCountedSet setWithArray:seven];
if([frequencies7 count] == 1){
word = [word stringByAppendingString:[[frequencies7 allObjects] objectAtIndex:0]];
}else{
NSUInteger count = 0;
NSString *repeatedWord = @"";
for(int i=0; i<[frequencies7 count]; i++){
NSString *possibleWord = [[frequencies7 allObjects] objectAtIndex:i];
NSUInteger wordCount = [frequencies7 countForObject:possibleWord];
if(count < wordCount){
count = wordCount;
repeatedWord = possibleWord;
}
}
word = [word stringByAppendingString:repeatedWord];
}
return word;
}
OCRImplementation类
OCRImplementation.h
#ifndef __TesseractSample__OCRImplementation__
#define __TesseractSample__OCRImplementation__
#endif /* defined(__TesseractSample__OCRImplementation__) */
@interface OCRImplementation : NSObject{
}
- (UIImage*)processImage:(id)sender;
- (NSString*)confidencesOCRTesseract:(UIImage*)picture;
@end
OCRImplementation.mm
#include "OCRImplementation.h"
#import <OpenCV/opencv2/imgproc/imgproc.hpp>
#import <OpenCV/opencv2/highgui/highgui.hpp>
#import "UIImage+OpenCV.h"
#import "Tesseract.h"
#import "baseapi.h"
#import "environ.h"
#import "pix.h"
#include <sstream>
#include <iostream>
#include <vector>
#include "OpenCV/opencv2/core/core.hpp"
#include "OpenCV/opencv2/features2d/features2d.hpp"
#include "OpenCV/opencv2/calib3d/calib3d.hpp"
@implementation OCRImplementation
- (NSMutableArray*)processImageDetectText:(id)sender threadhold:(int)threadhold{
UIImage *img1 = sender;
cv::Mat src = [img1 CVMat];
cv::Mat src_gray;
cv::Mat threshold_output;
cv::vector<cv::vector<cv::Point> > contours;
cv::vector<cv::Vec4i> hierarchy;
int thresh = 100;
cv::RNG rng(12345);
/// Convert image to gray and blur it
cvtColor( src, src_gray, CV_BGR2GRAY );
blur( src_gray, src_gray, cv::Size(3,3) );
/// Detect edges using Threshold
cv::threshold( src_gray, threshold_output, thresh, 255, cv::THRESH_BINARY );
/// Find contours
cv::findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, cv::Point(0, 0) );
/// Approximate contours to polygons + get bounding rects and circles
cv::vector<cv::vector<cv::Point> > contours_poly( contours.size() );
cv::vector<cv::Rect> boundRect( contours.size() );
cv::vector<cv::Point2f>center( contours.size() );
cv::vector<float>radius( contours.size() );
for( int i = 0; i < contours.size(); i++ )
{
approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
boundRect[i] = boundingRect( cv::Mat(contours_poly[i]) );
minEnclosingCircle( (cv::Mat)contours_poly[i], center[i], radius[i] );
}
/// Draw polygonal contour + bonding rects + circles
cv::Mat drawing = cv::Mat::zeros( threshold_output.size(), CV_8UC3 );
NSMutableDictionary *dictionaryImages = [NSMutableDictionary dictionary];
NSMutableArray *areaArray = [NSMutableArray array];
float lastArea = 0.0;
for( int i = 0; i< contours.size(); i++ )
{
cv::Scalar color = cv::Scalar( rng.uniform(0, 255), rng.uniform(0,255), rng.uniform(0,255) );
if(boundRect[i].height > 50){
double area = cv::contourArea(contours[i]);
if((boundRect[i].width < boundRect[i].height) && area > (lastArea / 2)){
lastArea = area;
[areaArray addObject:[NSString stringWithFormat:@"%f",area]];
rectangle( drawing, boundRect[i].tl(), boundRect[i].br(), color, 2, 8, 0 );
cv::Rect extendedRect = cv::Rect(boundRect[i].x - 5, boundRect[i].y - 5, boundRect[i].width + 10, boundRect[i].height + 10);
cv::Mat source = src;
cv::Mat target(extendedRect.size(), source.type());
if(0 <= extendedRect.x && 0 <= extendedRect.width && extendedRect.x + extendedRect.width <= source.cols && 0 <= extendedRect.y && 0 <= extendedRect.height && extendedRect.y + extendedRect.height <= source.rows){
source(extendedRect).copyTo(target);
//converting the original image into grayscale
cv::cvtColor(target, target, CV_BGR2GRAY);
cv::multiply(target, cv::Scalar(2,2,2), target);
cv::add(target, cv::Scalar(2,2,2), target);
/// Detect edges using Threshold
cv::threshold( target, threshold_output, threadhold, 255, cv::THRESH_BINARY );
cv::dilate(threshold_output, threshold_output, NULL);
UIImage *imgFinal = [OCRImplementation imageWithCVMat:threshold_output];
[dictionaryImages setObject:imgFinal forKey:[NSString stringWithFormat:@"%f", area]];
}else{
NSLog(@"Error al leer la imagen. NO ROI");
}
}
}
}
NSMutableDictionary *finalImages = [NSMutableDictionary dictionary];
NSMutableArray *sortedKeys = [NSMutableArray arrayWithArray:[areaArray sortedArrayUsingFunction:intSort context:NULL]];
for( int k = 0; k< contours.size(); k++ )
{
if(boundRect[k].height > 50){
for(int i = 0; i < [sortedKeys count]; i++){
double area = cv::contourArea(contours[k]);
if(area == [[sortedKeys objectAtIndex:i] floatValue]){
[finalImages setObject:[dictionaryImages objectForKey:[sortedKeys objectAtIndex:i]] forKey:[NSString stringWithFormat:@"%d",boundRect[k].x]];
}
}
}
}
NSMutableArray *array = [NSMutableArray array];
NSArray *keys = [finalImages allKeys];
NSArray *sortedKeys2 = [keys sortedArrayUsingFunction:intSortDesc context:NULL];
for(int i=0; i<[sortedKeys2 count]; i++){
[array addObject:[finalImages objectForKey:[sortedKeys2 objectAtIndex:i]]];
}
return array;
}
+ (UIImage *)imageWithCVMat:(const cv::Mat&)cvMat
{
NSData *data = [NSData dataWithBytes:cvMat.data length:cvMat.elemSize() * cvMat.total()];
CGColorSpaceRef colorSpace;
if (cvMat.elemSize() == 1) {
colorSpace = CGColorSpaceCreateDeviceGray();
} else {
colorSpace = CGColorSpaceCreateDeviceRGB();
}
CGDataProviderRef provider = CGDataProviderCreateWithCFData((CFDataRef)data);
CGImageRef imageRef = CGImageCreate(cvMat.cols, // Width
cvMat.rows, // Height
8, // Bits per component
8 * cvMat.elemSize(), // Bits per pixel
cvMat.step[0], // Bytes per row
colorSpace, // Colorspace
kCGImageAlphaNone | kCGBitmapByteOrderDefault, // Bitmap info flags
provider, // CGDataProviderRef
NULL, // Decode
false, // Should interpolate
kCGRenderingIntentDefault); // Intent
UIImage *image = [UIImage imageWithCGImage:imageRef];
CGImageRelease(imageRef);
CGDataProviderRelease(provider);
CGColorSpaceRelease(colorSpace);
return image;
}
- (NSString*)confidencesOCRTesseract:(UIImage*)picture{
tesseract::TessBaseAPI* tess;
uint32_t* _pixels;
NSString* _dataPath = @"tessdata";
NSString* _language = @"eng";
// Useful paths
NSFileManager *fileManager = [NSFileManager defaultManager];
NSArray *documentPaths = NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES);
NSString *documentPath = ([documentPaths count] > 0) ? [documentPaths objectAtIndex:0] : nil;
NSString *dataPath = [documentPath stringByAppendingPathComponent:_dataPath];
// Copy data in Doc Directory
if (![fileManager fileExistsAtPath:dataPath]) {
NSString *bundlePath = [[NSBundle mainBundle] bundlePath];
NSString *tessdataPath = [bundlePath stringByAppendingPathComponent:_dataPath];
if (tessdataPath) {
[fileManager copyItemAtPath:tessdataPath toPath:dataPath error:nil];
}
}
setenv("TESSDATA_PREFIX", [[documentPath stringByAppendingString:@"/"] UTF8String], 1);
tess = new tesseract::TessBaseAPI();
tess->Init([_dataPath UTF8String], [_language UTF8String]);
tess->SetVariable("save_blob_choices", "T");
tess->SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789");
//SET IMAGE
CGSize size = [picture size];
int width = size.width;
int height = size.height;
_pixels = (uint32_t *) malloc(width * height * sizeof(uint32_t));
// Clear the pixels so any transparency is preserved
memset(_pixels, 0, width * height * sizeof(uint32_t));
CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB();
// Create a context with RGBA _pixels
CGContextRef context = CGBitmapContextCreate(_pixels, width, height, 8, width * sizeof(uint32_t), colorSpace,
kCGBitmapByteOrder32Little | kCGImageAlphaPremultipliedLast);
// Paint the bitmap to our context which will fill in the _pixels array
CGContextDrawImage(context, CGRectMake(0, 0, width, height), [picture CGImage]);
// We're done with the context and color space
CGContextRelease(context);
CGColorSpaceRelease(colorSpace);
tess->SetImage((const unsigned char *) _pixels, width, height, sizeof(uint32_t), width * sizeof(uint32_t));
//END SET IMAGE
tess->Recognize(NULL);
tesseract::ResultIterator* ri = tess->GetIterator();
tesseract::ChoiceIterator* ci;
// For every identified symbol (there should be only one...)
NSString *finalWord = @"";
if(ri != 0) {
do {
const char* symbol = ri->GetUTF8Text(tesseract::RIL_SYMBOL);
if(symbol != 0) {
float conf = ri->Confidence(tesseract::RIL_SYMBOL);
if(conf > 80.0f){
finalWord = [finalWord stringByAppendingString:[NSString stringWithUTF8String:symbol]];
}else{
const tesseract::ResultIterator itr = *ri;
ci = new tesseract::ChoiceIterator(itr);
// For every chosen candidate...
do {
const char* choice = ci->GetUTF8Text();
NSString *choiceStr = [NSString stringWithUTF8String:choice];
if(choice && ![choiceStr isEqualToString:@""]){
finalWord = [finalWord stringByAppendingString:choiceStr];
break;
}else{
finalWord = [finalWord stringByAppendingString:[NSString stringWithUTF8String:symbol]];
}
} while(ci->Next());
delete ci;
}
}
delete[] symbol;
} while((ri->Next(tesseract::RIL_SYMBOL)));
}
return finalWord;
}
NSInteger intSort(id num1, id num2, void *context) {
NSString *n1 = (NSString *) num1;
NSString *n2 = (NSString *) num2;
n1 = [[n1 componentsSeparatedByString:@"."] objectAtIndex:0];
n2 = [[n2 componentsSeparatedByString:@"."] objectAtIndex:0];
if ([n1 floatValue] > [n2 floatValue]) {
return NSOrderedAscending;
}
else if ([n1 floatValue] < [n2 floatValue]) {
return NSOrderedDescending;
}
return NSOrderedSame;
}
NSInteger intSortDesc(id num1, id num2, void *context) {
NSString *n1 = (NSString *) num1;
NSString *n2 = (NSString *) num2;
n1 = [[n1 componentsSeparatedByString:@"."] objectAtIndex:0];
n2 = [[n2 componentsSeparatedByString:@"."] objectAtIndex:0];
if ([n1 floatValue] < [n2 floatValue]) {
return NSOrderedAscending;
}
else if ([n1 floatValue] > [n2 floatValue]) {
return NSOrderedDescending;
}
return NSOrderedSame;
}
@end