我正在做一个我需要使用OCR的项目。将检索上载图像和图像上的文本。 当我正在使用这个OCR时,我得到的响应是JSON格式。我希望以“text”为键获取所有数值。
我只设法获得看起来像这样的文本中的所有值
我希望结果像
1,381
1,210
1,240
1,160
1,090
1,020
这是JSON
{
"boundingBox": "1087,1201,228,83",
"words": [
{
"boundingBox": "1087,1201,228,83",
"text": "Sonic"
}
]
},
{
"boundingBox": "882,1287,700,55",
"words": [
{
"boundingBox": "882,1287,151,49",
"text": "Bacon"
},
{
"boundingBox": "1048,1287,170,49",
"text": "Double"
},
{
"boundingBox": "1232,1287,350,55",
"text": "Cheeseburger"
}
]
},
{
"boundingBox": "1014,1391,418,109",
"words": [
{
"boundingBox": "1014,1391,244,109",
"text": "1,240"
},
{
"boundingBox": "1287,1393,145,97",
"text": "cal"
}
]
},
{
"boundingBox": "853,1422,59,59",
"words": [
{
"boundingBox": "853,1422,59,59",
"text": "O"
}
]
},
{
"boundingBox": "970,1655,478,90",
"words": [
{
"boundingBox": "970,1655,278,90",
"text": "Burger"
},
{
"boundingBox": "1270,1655,178,90",
"text": "King"
}
]
},
{
"boundingBox": "917,1753,630,55",
"words": [
{
"boundingBox": "917,1754,142,54",
"text": "Triple"
},
{
"boundingBox": "1071,1754,225,54",
"text": "Whooper"
},
{
"boundingBox": "1308,1753,239,50",
"text": "Sandwich"
}
]
},
{
"boundingBox": "853,1855,576,109",
"words": [
{
"boundingBox": "853,1892,59,59",
"text": "e"
},
{
"boundingBox": "1018,1855,236,109",
"text": "1,160"
},
{
"boundingBox": "1283,1857,146,97",
"text": "cal"
}
]
}
这是我使用javascript和html的功能
function traverseJSON (jsonStruct, initialValue) {
for (x in jsonStruct) {
if (typeof(jsonStruct[x]) == "object") {
initialValue = traverseJSON(jsonStruct[x], initialValue);
} else {
if(x == "text"){
initialValue += jsonStruct[x] + " ";
}
}
}
return initialValue;
}
.done(function(data) {
// Show formatted JSON on webpage.
traverseThis(JSON.stringify(data, null, 2));
})
答案 0 :(得分:0)
如果我理解你的问题:
function getNumbers(data = []) {
return data.reduce((arr, entry) => {
const withNumbers = entry.words.filter(wordEntry => !!Number(wordEntry.text.replace(",", ".")))
return arr.concat(withNumbers);
}, []);
}
答案 1 :(得分:0)
使用jsonpath - documentChanges
获取json中的所有文字。
输入的示例输出
$[*].words[*].text
使用正则表达式 - "Sonic",
"Bacon",
"Double",
"Cheeseburger",
"1,240",
"cal",
"O",
"Burger",
"King",
"Triple",
"Whooper",
"Sandwich",
"e",
"1,160",
"cal"
来解析此输出。您可以在此处here在线试用。
答案 2 :(得分:0)
请检查以下代码是否符合您的要求。我刚刚使用纯Javascript函数isNaN()和replace()实现了这一点。
let input=[{
"boundingBox": "1087,1201,228,83",
"words": [{
"boundingBox": "1087,1201,228,83",
"text": "Sonic"
}]
}, {
"boundingBox": "882,1287,700,55",
"words": [{
"boundingBox": "882,1287,151,49",
"text": "Bacon"
},
{
"boundingBox": "1048,1287,170,49",
"text": "Double"
},
{
"boundingBox": "1232,1287,350,55",
"text": "Cheeseburger"
}
]
}, {
"boundingBox": "1014,1391,418,109",
"words": [{
"boundingBox": "1014,1391,244,109",
"text": "1,240"
},
{
"boundingBox": "1287,1393,145,97",
"text": "cal"
}
]
}, {
"boundingBox": "853,1422,59,59",
"words": [{
"boundingBox": "853,1422,59,59",
"text": "O"
}]
}, {
"boundingBox": "970,1655,478,90",
"words": [{
"boundingBox": "970,1655,278,90",
"text": "Burger"
},
{
"boundingBox": "1270,1655,178,90",
"text": "King"
}
]
}, {
"boundingBox": "917,1753,630,55",
"words": [{
"boundingBox": "917,1754,142,54",
"text": "Triple"
},
{
"boundingBox": "1071,1754,225,54",
"text": "Whooper"
},
{
"boundingBox": "1308,1753,239,50",
"text": "Sandwich"
}
]
}, {
"boundingBox": "853,1855,576,109",
"words": [{
"boundingBox": "853,1892,59,59",
"text": "e"
},
{
"boundingBox": "1018,1855,236,109",
"text": "1,160"
},
{
"boundingBox": "1283,1857,146,97",
"text": "cal"
}
]
}]
function traverseJSON (jsonStruct, initialValue) {
for (x in jsonStruct) {
if (typeof(jsonStruct[x]) == "object") {
initialValue = traverseJSON(jsonStruct[x], initialValue);
} else {
if(x == "text"){
if(!isNaN(jsonStruct[x].replace(',',''))){
initialValue += jsonStruct[x] + "\n";
}
}
}
}
return initialValue;
}
console.log(traverseJSON(input,''))