试图找到扫描图像中文本的坐标。扫描的图像具有许多文本数据,需要将该图像数据转换为文本,然后获取文本的坐标。坐标表示边界框,例如X,Y轴,高度和宽度,其中文本为
使用Microsoft OCR ProjectOxford Vision
using Microsoft.ProjectOxford.Vision;
using Microsoft.ProjectOxford.Vision.Contract;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
namespace TextExtraction
{
class Program
{
const string API_key = "<<Key>>";
const string API_location =
"https://westcentralus.api.cognitive.microsoft.com/vision/v1.0";
static void Main(string[] args)
{
string imgToAnalyze = @"C:\Users\abhis\Desktop\image.jpg";
HandwritingExtraction(imgToAnalyze, false);
Console.ReadLine();
}
public static void PrintResults(string[] res)
{
foreach (string r in res)
Console.WriteLine(r);
Console.ReadLine();
}
public static void HandwritingExtraction(string fname, bool wrds)
{
Task.Run(async () =>
{
string[] res = await HandwritingExtractionCore(fname, wrds);
PrintResults(res);
}).Wait();
}
public static async Task<string[]> HandwritingExtractionCore(string fname, bool wrds)
{
VisionServiceClient client = new VisionServiceClient(API_key, API_location);
string[] textres = null;
if (File.Exists(fname))
using (Stream stream = File.OpenRead(fname))
{
HandwritingRecognitionOperation op = await
client.CreateHandwritingRecognitionOperationAsync(stream);
HandwritingRecognitionOperationResult res = await
client.GetHandwritingRecognitionOperationResultAsync(op);
textres = GetExtracted(res, wrds);
}
return textres;
}
public static string[] GetExtracted(HandwritingRecognitionOperationResult res, bool wrds)
{
List<string> items = new List<string>();
foreach (HandwritingTextLine l in res.RecognitionResult.Lines)
if (wrds)
items.AddRange(GetWords(l));
else
items.Add(GetLineAsString(l));
return items.ToArray();
}
public static List<string> GetWords(HandwritingTextLine line)
{
List<string> words = new List<string>();
foreach (HandwritingTextWord w in line.Words)
words.Add(w.Text);
return words;
}
public static string GetLineAsString(HandwritingTextLine line)
{
List<string> words = GetWords(line);
return words.Count > 0 ? string.Join(" ", words) : string.Empty;
}
}
}
预期输出: 获取具有各自坐标(x,y,高度,宽度)的文本
Json输出
{ “ status”:“成功”, “成功”:是的, “失败”:错误, “完成”:是的, “ recognitionResults”:[ { “页面”:1, “ clockwiseOrientation”:359.62, “宽度”:505, “身高”:399, “单位”:“像素”, “行”:[ { “ boundingBox”:[ 224, 58 380, 57, 381, 74, 225, 75 ], “ text”:“印度政府”, “字”:[ { “ boundingBox”:[ 229, 59, 321, 58 320, 75, 229, 75 ], “ text”:“政府” }, { “ boundingBox”:[ 324, 58 341, 58 341, 75, 323, 75 ], “ text”:“ OF” }, { “ boundingBox”:[ 344, 58 381, 58 381, 75, 344, 75 ], “ text”:“ INDIA” } ] }, { “ boundingBox”:[ 211, 159, 429, 160, 428, 180, 210, 178 ], “ text”:“ FH faPet / DOB:27/07/1982”, “字”:[ { “ boundingBox”:[ 225, 160, 243, 160, 243, 179, 225, 179 ], “ text”:“ FH” }, { “ boundingBox”:[ 247, 160, 286, 160, 286, 179, 247, 179 ], “ text”:“ faPet /” }, { “ boundingBox”:[ 290, 160, 333, 160, 333, 179, 290, 179 ], “ text”:“ DOB:” }, { “ boundingBox”:[ 337 160, 428, 162, 428, 180, 337 179 ], “ text”:“ 1982年7月27日” } ] }, { “ boundingBox”:[ 209, 192, 313, 190, 314, 208, 210, 210 ], “ text”:“ you / MALE”, “字”:[ { “ boundingBox”:[ 214, 192, 247, 192, 246, 209, 214, 210 ], “发短信给你” }, { “ boundingBox”:[ 254, 192, 260, 192, 260, 209, 254, 209 ], “ text”:“ /” }, { “ boundingBox”:[ 264, 192, 314, 192, 313, 208, 263, 209 ], “ text”:“ MALE” } ] }, { “ boundingBox”:[ 201, 314, 351, 313, 352, 330, 202, 331 ], “ text”:“ 66 66 6666 6666”, “字”:[ { “ boundingBox”:[ 204, 315, 225, 314, 225, 330, 204, 331 ], “ text”:“ 66” }, { “ boundingBox”:[ 229, 314, 251, 314, 251, 330, 229, 330 ], “ text”:“ 66” }, { “ boundingBox”:[ 255, 314, 301, 314, 301, 330, 255, 330 ], “ text”:“ 6666” }, { “ boundingBox”:[ 307, 314, 352, 314, 351, 331 306, 330 ], “ text”:“ 6666” } ] } ] } ] }
答案 0 :(得分:1)
我猜您正在使用Microsoft C#Azure应用之类的东西。这是您问题的详细链接。
在contentString里面。就像……
"language": "en",
"textAngle": -1.5000000000000335,
"orientation": "Up",
"regions": [
{
"boundingBox": "154,49,351,575",
"lines": [
{
"boundingBox": "165,49,340,117",
"words": [
{
"boundingBox": "165,49,63,109",
"text": "A"
},
{
"boundingBox": "261,50,244,116",
"text": "GOAL"
}
]
},
{
我用Azsure C#做过一些项目。但是您的代码看起来并不十分熟悉。
我建议您查看 textres 或 res (在您的代码中)内的所有数据格式,我认为它包含与上面的字符串所示相同的引用< / p>
答案 1 :(得分:1)
首先,请注意Microsoft Cognitive Services中有两种不同的用于文本识别的API。 Yuan博士的输出来自OCR API,它具有更广泛的语言覆盖范围,而Tony的输出表明他正在呼叫更新和改进的Read API。
第二,请注意,上面的代码示例Microsoft.ProjectOxford.Vision
中引用的客户端SDK已被弃用,您将希望切换到替换Microsoft.Azure.CognitiveServices.Vision.ComputerVision
,该示例将为您找到{{3 }}。
最后,是特定问题的答案。 boundingBox
字段中表示文档中识别的文本的位置。因此,对于您的示例输出JSON,文本行GOVERNMENT OF INDIA
由坐标(224,58),(380,57),(381,74)和(225,75)界定,代表了四个角。它不是x,y,width,height
格式以允许旋转。请注意,边界框的单位也包含在JSON中(在您的情况下为像素)。如果您要查找的话,该行中每个单词的位置也在响应JSON中。