我在5张图像上训练了模型,但准确性不是特别高。
链接至表单:https://imgur.com/a/BOHVG7G
JSON输出:
{
"status": "success",
"pages": [
{
"number": 1,
"height": 1055,
"width": 1225,
"clusterId": 0,
"keyValuePairs": [
{
"key": [
{
"text": "Name:",
"boundingBox": [
163.7,
987.1,
242.2,
987.1,
242.2,
963.4,
163.7,
963.4
]
}
],
"value": [
{
"text": "Luca Bassi",
"boundingBox": [
365.3,
982.1,
458.8,
982.1,
458.8,
963.4,
365.3,
963.4
],
"confidence": 1
},
{
"text": "Brassi",
"boundingBox": [
365.3,
938.7,
417.1,
938.7,
417.1,
919,
365.3,
919
],
"confidence": 1
}
]
},
{
"key": [
{
"text": "Surname:",
"boundingBox": [
166.9,
937.8,
282.8,
937.8,
282.8,
913.1,
166.9,
913.1
]
}
],
"value": [
{
"text": "19 Cider Lane",
"boundingBox": [
367.3,
719.1,
490.4,
719.1,
490.4,
698.4,
367.3,
698.4
],
"confidence": 0.8
}
]
},
{
"key": [
{
"text": "e-Mail Address:",
"boundingBox": [
164.7,
893.4,
358.1,
893.4,
358.1,
867.8,
164.7,
867.8
]
}
],
"value": [
{
"text": "brassi@brassi.com",
"boundingBox": [
364.3,
893.4,
528,
893.4,
528,
867.8,
364.3,
867.8
],
"confidence": 0.6
}
]
},
{
"key": [
{
"text": "Phone Number:",
"boundingBox": [
163.7,
849.1,
361.1,
849.1,
361.1,
822.6,
163.7,
822.6
]
}
],
"value": [
{
"text": "456-3456",
"boundingBox": [
367.3,
849.1,
451.8,
849.1,
451.8,
822.6,
367.3,
822.6
],
"confidence": 1
}
]
},
{
"key": [
{
"text": "Mobile Number:",
"boundingBox": [
164.7,
803.8,
361.1,
803.8,
361.1,
777.3,
164.7,
777.3
]
}
],
"value": [
{
"text": "456-2135",
"boundingBox": [
366.3,
803.8,
450.8,
803.8,
450.8,
777.3,
366.3,
777.3
],
"confidence": 1
}
]
},
{
"key": [
{
"text": "Street:",
"boundingBox": [
166.9,
714.1,
246.2,
714.1,
246.2,
690.5,
166.9,
690.5
]
}
],
"value": []
},
{
"key": [
{
"text": "House:",
"boundingBox": [
163.7,
668.8,
250.2,
668.8,
250.2,
645.3,
163.7,
645.3
]
}
],
"value": [
{
"text": "Detroit",
"boundingBox": [
364.3,
628.5,
427.3,
628.5,
427.3,
609.7,
364.3,
609.7
],
"confidence": 0.6
}
]
},
{
"key": [
{
"text": "Town:",
"boundingBox": [
166.9,
623.5,
241.2,
623.5,
241.2,
598.9,
166.9,
598.9
]
}
],
"value": [
{
"text": "80012",
"boundingBox": [
365.3,
585.2,
418.1,
585.2,
418.1,
565.5,
365.3,
565.5
],
"confidence": 1
}
]
},
{
"key": [
{
"text": "Postcode:",
"boundingBox": [
164.7,
580.2,
286.8,
580.2,
286.8,
554.5,
164.7,
554.5
]
}
],
"value": [
{
"text": "Russia",
"boundingBox": [
365.3,
534.8,
417.1,
534.8,
417.1,
516.2,
365.3,
516.2
],
"confidence": 0.6
}
]
},
{
"key": [
{
"text": "Comments:",
"boundingBox": [
166.9,
487.7,
305.2,
487.7,
305.2,
464,
166.9,
464
]
}
],
"value": [
{
"text": "The quick brown fox",
"boundingBox": [
366.3,
485.7,
549.4,
485.7,
549.4,
464,
366.3,
464
],
"confidence": 0.6
}
]
}
],
"tables": []
}
],
"errors": []
}
正如您所看到的,姓氏和地址信息的确不是那么好。有没有一种方法可以更有效地进行训练,还是我只需要使用更大的数据集?
我在azure的门户网站上进行了挖掘,但我不确定我是否过分选择了一种可以更好地进行培训的选项。