我正在尝试使用Microsoft Computer vision和Blue Prism(RPA工具)从图像中提取文本。我有一个如下的JSON响应:
{
"status": "Succeeded",
"recognitionResults": [{
"page": 1,
"clockwiseOrientation": 0.76,
"width": 1700,
"height": 2200,
"unit": "pixel",
"lines": [{
"boundingBox": [691, 55, 854, 57, 853, 78, 690, 76],
"text": "Display Record",
"words": [{
"boundingBox": [695, 56, 775, 57, 776, 77, 695, 76],
"text": "Display"
}, {
"boundingBox": [783, 57, 853, 58, 854, 79, 783, 78],
"text": "Record"
}]
}, {
"boundingBox": [57, 102, 397, 107, 396, 130, 56, 126],
"text": "Customer Number . . 0012343345",
"words": [{
"boundingBox": [60, 103, 153, 106, 153, 127, 60, 124],
"text": "Customer"
}, {
"boundingBox": [160, 106, 230, 107, 230, 129, 160, 128],
"text": "Number"
}, {
"boundingBox": [242, 108, 256, 108, 255, 129, 241, 129],
"text": "."
}, {
"boundingBox": [264, 108, 278, 108, 278, 130, 264, 129],
"text": "."
}, {
"boundingBox": [284, 108, 397, 107, 396, 130, 284, 130],
"text": "0012343345"
}]
}, {
"boundingBox": [685, 110, 1126, 115, 1125, 140, 684, 136],
"text": "Short Name . . . . . . SHOES AND THINGS",
"words": [{
"boundingBox": [685, 111, 742, 114, 742, 135, 685, 133],
"text": "Short"
}, {
"boundingBox": [749, 114, 798, 116, 798, 137, 749, 135],
"text": "Name"
}, {
"boundingBox": [808, 117, 823, 117, 823, 137, 808, 137],
"text": "."
}, {
"boundingBox": [830, 117, 844, 117, 844, 138, 830, 138],
"text": "."
}, {
"boundingBox": [851, 118, 866, 118, 866, 138, 851, 138],
"text": "."
}, {
"boundingBox": [874, 118, 889, 118, 889, 139, 874, 139],
"text": "."
}, {
"boundingBox": [897, 118, 912, 119, 912, 139, 897, 139],
"text": "."
}, {
"boundingBox": [919, 119, 933, 119, 933, 139, 919, 139],
"text": "."
}, {
"boundingBox": [940, 119, 999, 118, 999, 139, 940, 139],
"text": "SHOES"
}, {
"boundingBox": [1005, 118, 1042, 118, 1042, 139, 1005, 139],
"text": "AND"
}, {
"boundingBox": [1051, 118, 1121, 115, 1121, 138, 1051, 139],
"text": "THINGS"
}]
}, {
"boundingBox": [57, 132, 366, 135, 365, 159, 56, 155],
"text": "Name . . . SHOES AND THINGS",
"words": [{
"boundingBox": [59, 132, 109, 134, 108, 156, 58, 155],
"text": "Name"
}, {
"boundingBox": [118, 134, 133, 134, 132, 157, 117, 156],
"text": "."
}, {
"boundingBox": [141, 134, 156, 135, 155, 157, 140, 157],
"text": "."
}, {
"boundingBox": [163, 135, 178, 135, 178, 157, 163, 157],
"text": "."
}, {
"boundingBox": [183, 135, 243, 136, 243, 158, 182, 157],
"text": "SHOES"
}, {
"boundingBox": [249, 136, 287, 137, 286, 158, 249, 158],
"text": "AND"
}, {
"boundingBox": [296, 137, 366, 138, 365, 157, 296, 158],
"text": "THINGS"
}]
}, {
"boundingBox": [682, 139, 930, 143, 929, 167, 682, 163],
"text": "Legal Residence Code .",
"words": [{
"boundingBox": [683, 141, 743, 141, 742, 163, 682, 163],
"text": "Legal"
}, {
"boundingBox": [749, 141, 855, 143, 854, 164, 748, 163],
"text": "Residence"
}, {
"boundingBox": [862, 144, 911, 146, 910, 165, 861, 164],
"text": "Code"
}, {
"boundingBox": [920, 147, 930, 148, 929, 166, 918, 166],
"text": "."
}]
}, {
"boundingBox": [682, 279, 1066, 284, 1065, 305, 682, 301],
"text": "Date of Registration/ Incorporation",
"words": [{
"boundingBox": [683, 280, 729, 281, 728, 301, 682, 301],
"text": "Date"
}, {
"boundingBox": [737, 281, 763, 282, 762, 301, 736, 301],
"text": "of"
}, {
"boundingBox": [771, 282, 913, 284, 912, 303, 770, 301],
"text": "Registration/"
}, {
"boundingBox": [917, 284, 1063, 285, 1061, 305, 916, 303],
"text": "Incorporation"
}]
}, {
"boundingBox": [1083, 282, 1192, 284, 1191, 307, 1082, 305],
"text": "2/14/2016",
"words": [{
"boundingBox": [1094, 284, 1192, 285, 1191, 307, 1094, 305],
"text": "2/14/2016"
}]
}, {
"boundingBox": [57, 326, 454, 332, 453, 356, 56, 350],
"text": "Email Addr SHOESANDTHINGS@YAHOO . COM",
"words": [{
"boundingBox": [58, 327, 119, 328, 118, 351, 57, 350],
"text": "Email"
}, {
"boundingBox": [125, 328, 174, 329, 173, 352, 124, 351],
"text": "Addr"
}, {
"boundingBox": [181, 329, 402, 333, 402, 354, 181, 352],
"text": "SHOESANDTHINGS@YAHOO"
}, {
"boundingBox": [407, 333, 412, 334, 411, 355, 407, 355],
"text": "."
}, {
"boundingBox": [416, 334, 451, 334, 451, 355, 416, 355],
"text": "COM"
}]
}, {
"boundingBox": [680, 335, 951, 338, 950, 359, 679, 356],
"text": "Customer Risk Class . N",
"words": [{
"boundingBox": [681, 336, 772, 336, 772, 358, 681, 355],
"text": "Customer"
}, {
"boundingBox": [781, 336, 827, 337, 827, 358, 781, 358],
"text": "Risk"
}, {
"boundingBox": [837, 337, 896, 339, 896, 358, 837, 358],
"text": "Class"
}, {
"boundingBox": [917, 339, 930, 340, 930, 357, 918, 358],
"text": "."
}, {
"boundingBox": [936, 340, 949, 340, 950, 357, 937, 357],
"text": "N"
}]
}, {
"boundingBox": [54, 356, 236, 358, 236, 383, 53, 380],
"text": "National ID Nbr",
"words": [{
"boundingBox": [57, 358, 153, 358, 153, 381, 56, 380],
"text": "National"
}, {
"boundingBox": [161, 358, 185, 359, 184, 382, 160, 381],
"text": "ID"
}, {
"boundingBox": [191, 359, 230, 360, 230, 383, 190, 382],
"text": "Nbr",
"confidence": "Low"
}]
}, {
"boundingBox": [676, 362, 757, 366, 756, 387, 675, 382],
"text": "Remarks",
"words": [{
"boundingBox": [680, 364, 756, 369, 756, 386, 680, 384],
"text": "Remarks"
}]
}, {
"boundingBox": [58, 386, 185, 388, 184, 408, 57, 406],
"text": "Tax ID Cert",
"words": [{
"boundingBox": [59, 386, 93, 387, 93, 407, 59, 406],
"text": "Tax"
}, {
"boundingBox": [104, 387, 126, 388, 126, 407, 104, 407],
"text": "ID"
}, {
"boundingBox": [136, 388, 183, 389, 183, 407, 136, 407],
"text": "Cert"
}]
}, {
"boundingBox": [676, 390, 925, 396, 924, 417, 675, 412],
"text": "Originating Branch . .",
"words": [{
"boundingBox": [680, 392, 805, 395, 804, 415, 680, 410],
"text": "Originating"
}, {
"boundingBox": [814, 395, 881, 396, 881, 416, 813, 415],
"text": "Branch"
}, {
"boundingBox": [895, 396, 907, 396, 906, 416, 895, 416],
"text": "."
}, {
"boundingBox": [917, 396, 924, 396, 924, 416, 917, 416],
"text": "."
}]
}, {
"boundingBox": [952, 396, 994, 396, 994, 414, 953, 413],
"text": "126",
"words": [{
"boundingBox": [960, 395, 993, 395, 993, 413, 960, 413],
"text": "126"
}]
}, {
"boundingBox": [1030, 394, 1376, 397, 1375, 420, 1029, 417],
"text": "Date Last Tagged . . 2/04/2019",
"words": [{
"boundingBox": [1035, 396, 1082, 397, 1083, 417, 1035, 415],
"text": "Date"
}, {
"boundingBox": [1091, 397, 1138, 397, 1139, 418, 1091, 417],
"text": "Last"
}, {
"boundingBox": [1148, 397, 1217, 398, 1217, 419, 1149, 418],
"text": "Tagged"
}, {
"boundingBox": [1227, 398, 1239, 398, 1240, 419, 1227, 419],
"text": "."
}, {
"boundingBox": [1249, 398, 1262, 398, 1262, 420, 1249, 419],
"text": "."
}, {
"boundingBox": [1281, 398, 1376, 397, 1375, 420, 1281, 420],
"text": "2/04/2019"
}]
}, {
"boundingBox": [51, 412, 214, 414, 213, 435, 50, 433],
"text": "Tax ID Number",
"words": [{
"boundingBox": [57, 414, 92, 414, 92, 434, 58, 434],
"text": "Tax"
}, {
"boundingBox": [103, 414, 125, 414, 125, 435, 104, 434],
"text": "ID"
}, {
"boundingBox": [134, 414, 205, 415, 203, 436, 134, 435],
"text": "Number"
}]
}, {
"boundingBox": [282, 414, 470, 416, 469, 440, 281, 438],
"text": ". 11110201201900",
"words": [{
"boundingBox": [295, 417, 307, 416, 306, 436, 293, 436],
"text": "."
}, {
"boundingBox": [315, 416, 470, 417, 468, 440, 314, 436],
"text": "11110201201900"
}]
}, {
"boundingBox": [546, 417, 719, 420, 718, 441, 545, 438],
"text": "Present Address",
"words": [{
"boundingBox": [548, 417, 628, 420, 627, 440, 547, 438],
"text": "Present"
}, {
"boundingBox": [636, 420, 718, 421, 718, 440, 635, 440],
"text": "Address"
}]
}, {
"boundingBox": [1022, 420, 1493, 425, 1492, 451, 1021, 446],
"text": "Permanent Bus. Addr (Same as Present Y Y/N)",
"words": [{
"boundingBox": [1025, 424, 1127, 424, 1126, 444, 1024, 443],
"text": "Permanent"
}, {
"boundingBox": [1136, 424, 1185, 424, 1184, 444, 1135, 444],
"text": "Bus."
}, {
"boundingBox": [1192, 424, 1236, 425, 1235, 445, 1190, 444],
"text": "Addr"
}, {
"boundingBox": [1240, 425, 1294, 425, 1292, 446, 1239, 445],
"text": "(Same"
}, {
"boundingBox": [1303, 425, 1328, 425, 1327, 447, 1302, 446],
"text": "as"
}, {
"boundingBox": [1337, 425, 1417, 425, 1415, 449, 1335, 447],
"text": "Present"
}, {
"boundingBox": [1426, 425, 1439, 425, 1437, 450, 1425, 449],
"text": "Y"
}, {
"boundingBox": [1448, 425, 1492, 425, 1490, 451, 1446, 450],
"text": "Y/N)"
}]
}, {
"boundingBox": [52, 440, 232, 442, 231, 464, 51, 461],
"text": "SSS/GSIS Number.",
"words": [{
"boundingBox": [58, 441, 150, 442, 149, 462, 58, 462],
"text": "SSS/GSIS"
}, {
"boundingBox": [157, 442, 232, 444, 230, 464, 156, 462],
"text": "Number."
}]
}, {
"boundingBox": [299, 442, 437, 443, 436, 465, 298, 464],
"text": "44402012019",
"words": [{
"boundingBox": [316, 445, 436, 444, 434, 466, 314, 462],
"text": "44402012019"
}]
}, {
"boundingBox": [545, 444, 712, 446, 711, 468, 545, 466],
"text": "UNI1206 2ND FLR",
"words": [{
"boundingBox": [547, 445, 629, 446, 629, 467, 547, 466],
"text": "UNI1206"
}, {
"boundingBox": [637, 446, 673, 447, 673, 468, 637, 467],
"text": "2ND"
}, {
"boundingBox": [681, 447, 712, 448, 712, 469, 681, 468],
"text": "FLR"
}]
}, {
"boundingBox": [1022, 448, 1191, 449, 1190, 472, 1021, 472],
"text": "UNI1206 2ND FLR",
"words": [{
"boundingBox": [1025, 450, 1107, 449, 1107, 471, 1025, 473],
"text": "UNI1206",
"confidence": "Low"
}, {
"boundingBox": [1114, 449, 1151, 450, 1152, 471, 1115, 471],
"text": "2ND"
}, {
"boundingBox": [1158, 450, 1190, 451, 1191, 472, 1159, 471],
"text": "FLR"
}]
}, {
"boundingBox": [55, 466, 200, 468, 199, 488, 54, 487],
"text": "Branch Number",
"words": [{
"boundingBox": [56, 467, 125, 468, 125, 489, 57, 487],
"text": "Branch"
}, {
"boundingBox": [135, 468, 201, 468, 200, 488, 135, 489],
"text": "Number"
}]
}, {
"boundingBox": [332, 472, 365, 471, 364, 490, 332, 491],
"text": "12",
"words": [{
"boundingBox": [337, 471, 360, 471, 361, 490, 338, 490],
"text": "12",
"confidence": "Low"
}]
}, {
"boundingBox": [551, 471, 914, 475, 913, 499, 550, 495],
"text": "(Unit/House No. , Street , Subd/Bgy . )",
"words": [{
"boundingBox": [552, 472, 674, 476, 672, 496, 551, 495],
"text": "(Unit/House"
}, {
"boundingBox": [680, 476, 713, 476, 711, 496, 678, 496],
"text": "No."
}, {
"boundingBox": [717, 476, 722, 476, 720, 496, 716, 496],
"text": ","
}, {
"boundingBox": [726, 476, 791, 477, 789, 497, 725, 496],
"text": "Street"
}, {
"boundingBox": [795, 477, 800, 477, 798, 497, 793, 497],
"text": ","
}, {
"boundingBox": [804, 476, 891, 475, 889, 499, 802, 497],
"text": "Subd/Bgy"
}, {
"boundingBox": [896, 475, 900, 475, 898, 499, 893, 499],
"text": "."
}, {
"boundingBox": [905, 475, 914, 475, 911, 500, 902, 500],
"text": ")"
}]
}, {
"boundingBox": [1030, 477, 1388, 481, 1387, 505, 1029, 500],
"text": "(Unit/House No. , Street, Subd/Bgy. )",
"words": [{
"boundingBox": [1030, 477, 1152, 479, 1152, 502, 1030, 500],
"text": "(Unit/House"
}, {
"boundingBox": [1157, 479, 1189, 480, 1188, 503, 1156, 502],
"text": "No."
}, {
"boundingBox": [1193, 480, 1198, 480, 1198, 503, 1193, 503],
"text": ","
}, {
"boundingBox": [1202, 480, 1276, 481, 1275, 504, 1202, 503],
"text": "Street,"
}, {
"boundingBox": [1280, 481, 1376, 483, 1376, 505, 1280, 504],
"text": "Subd/Bgy."
}, {
"boundingBox": [1381, 483, 1387, 483, 1387, 505, 1381, 505],
"text": ")"
}]
}, {
"boundingBox": [62, 495, 197, 497, 196, 519, 61, 516],
"text": "SAMSON ROAD",
"words": [{
"boundingBox": [68, 498, 135, 497, 134, 517, 68, 516],
"text": "SAMSON"
}, {
"boundingBox": [147, 497, 192, 498, 191, 519, 146, 518],
"text": "ROAD"
}]
}, {
"boundingBox": [545, 501, 862, 503, 861, 525, 544, 523],
"text": "SHOPPESVILLE PLUS GREENHILLS",
"words": [{
"boundingBox": [548, 502, 684, 503, 684, 524, 548, 523],
"text": "SHOPPESVILLE"
}, {
"boundingBox": [693, 503, 739, 503, 739, 524, 692, 524],
"text": "PLUS"
}, {
"boundingBox": [748, 503, 862, 504, 861, 525, 747, 524],
"text": "GREENHILLS"
}]
}, {
"boundingBox": [1030, 506, 1344, 509, 1343, 531, 1029, 528],
"text": "SHOPPESVILLE PLUS GREENHILLS",
"words": [{
"boundingBox": [1030, 506, 1162, 509, 1162, 530, 1030, 528],
"text": "SHOPPESVILLE",
"confidence": "Low"
}, {
"boundingBox": [1170, 509, 1218, 510, 1218, 530, 1170, 530],
"text": "PLUS"
}, {
"boundingBox": [1224, 510, 1340, 511, 1341, 529, 1224, 530],
"text": "GREENHILLS"
}]
}, {
"boundingBox": [54, 524, 233, 525, 232, 547, 53, 546],
"text": "Primary Officer",
"words": [{
"boundingBox": [58, 526, 137, 525, 137, 548, 57, 547],
"text": "Primary"
}, {
"boundingBox": [147, 525, 227, 526, 227, 548, 146, 548],
"text": "Officer"
}]
}, {
"boundingBox": [279, 527, 347, 528, 346, 549, 279, 549],
"text": ". 126",
"words": [{
"boundingBox": [295, 530, 306, 528, 304, 544, 293, 546],
"text": "."
}, {
"boundingBox": [314, 529, 347, 533, 346, 549, 313, 544],
"text": "126"
}]
}, {
"boundingBox": [552, 527, 702, 532, 701, 552, 551, 548],
"text": "(District, Town",
"words": [{
"boundingBox": [552, 528, 655, 531, 655, 551, 552, 549],
"text": "(District,",
"confidence": "Low"
}, {
"boundingBox": [659, 531, 702, 532, 701, 552, 659, 551],
"text": "Town"
}]
}, {
"boundingBox": [1026, 532, 1185, 537, 1184, 560, 1025, 554],
"text": "(District, Town)",
"words": [{
"boundingBox": [1028, 533, 1132, 537, 1133, 558, 1028, 555],
"text": "(District,"
}, {
"boundingBox": [1137, 537, 1183, 539, 1184, 558, 1138, 558],
"text": "Town)"
}]
}, {
"boundingBox": [52, 551, 188, 555, 188, 577, 51, 573],
"text": "Access Code",
"words": [{
"boundingBox": [57, 553, 127, 554, 127, 575, 56, 573],
"text": "Access"
}, {
"boundingBox": [135, 554, 182, 555, 182, 577, 135, 575],
"text": "Code"
}]
}, {
"boundingBox": [312, 553, 368, 557, 366, 573, 310, 568],
"text": "126",
"words": [{
"boundingBox": [337, 557, 367, 558, 366, 573, 336, 570],
"text": "126"
}]
}, {
"boundingBox": [548, 554, 702, 556, 701, 578, 547, 576],
"text": "SAN JUAN CITY",
"words": [{
"boundingBox": [548, 555, 582, 555, 583, 577, 549, 577],
"text": "SAN"
}, {
"boundingBox": [592, 555, 638, 556, 639, 577, 593, 577],
"text": "JUAN"
}, {
"boundingBox": [647, 556, 696, 557, 696, 578, 648, 577],
"text": "CITY"
}]
}, {
"boundingBox": [1027, 562, 1171, 565, 1170, 586, 1026, 582],
"text": "SAN JUAN CITY",
"words": [{
"boundingBox": [1027, 562, 1058, 563, 1059, 583, 1027, 582],
"text": "SAN"
}, {
"boundingBox": [1070, 564, 1115, 565, 1115, 584, 1071, 583],
"text": "JUAN"
}, {
"boundingBox": [1125, 565, 1170, 567, 1171, 585, 1126, 584],
"text": "CITY"
}]
}, {
"boundingBox": [56, 581, 184, 585, 183, 605, 56, 602],
"text": "Citizenship",
"words": [{
"boundingBox": [59, 583, 181, 587, 181, 605, 57, 602],
"text": "Citizenship"
}]
}, {
"boundingBox": [547, 581, 617, 584, 616, 607, 546, 604],
"text": "(city)",
"words": [{
"boundingBox": [549, 583, 617, 585, 616, 607, 550, 604],
"text": "(city)",
"confidence": "Low"
}]
}, {
"boundingBox": [1026, 588, 1093, 591, 1092, 614, 1025, 611],
"text": "(city)",
"words": [{
"boundingBox": [1027, 590, 1090, 591, 1092, 611, 1029, 611],
"text": "(city)"
}]
}, {
"boundingBox": [51, 606, 233, 609, 232, 631, 50, 628],
"text": "Customer Class .",
"words": [{
"boundingBox": [56, 608, 149, 609, 149, 629, 57, 629],
"text": "Customer"
}, {
"boundingBox": [157, 609, 217, 611, 216, 631, 157, 629],
"text": "Class"
}, {
"boundingBox": [228, 611, 232, 611, 231, 631, 227, 631],
"text": "."
}]
}, {
"boundingBox": [549, 611, 674, 612, 673, 635, 548, 634],
"text": "PHILIPPINES",
"words": [{
"boundingBox": [549, 611, 674, 613, 675, 635, 549, 633],
"text": "PHILIPPINES"
}]
}, {
"boundingBox": [1023, 616, 1154, 618, 1153, 640, 1022, 637],
"text": "PHILIPPINES",
"words": [{
"boundingBox": [1024, 616, 1151, 619, 1152, 639, 1024, 638],
"text": "PHILIPPINES"
}]
}, {
"boundingBox": [56, 635, 234, 639, 233, 661, 55, 656],
"text": "Market Segment .",
"words": [{
"boundingBox": [56, 639, 125, 637, 124, 656, 56, 657],
"text": "Market"
}, {
"boundingBox": [135, 637, 214, 642, 212, 660, 134, 656],
"text": "Segment"
}, {
"boundingBox": [227, 643, 234, 644, 232, 661, 225, 661],
"text": "."
}]
}, {
"boundingBox": [547, 639, 745, 644, 744, 666, 546, 661],
"text": "(Province , Country)",
"words": [{
"boundingBox": [549, 640, 645, 643, 646, 664, 550, 661],
"text": "(Province"
}, {
"boundingBox": [649, 643, 655, 643, 655, 664, 650, 664],
"text": ","
}, {
"boundingBox": [659, 643, 745, 644, 746, 666, 659, 664],
"text": "Country)"
}]
}, {
"boundingBox": [1028, 643, 1227, 646, 1226, 669, 1027, 665],
"text": "(Province, Country)",
"words": [{
"boundingBox": [1028, 644, 1130, 646, 1131, 667, 1029, 667],
"text": "(Province,"
}, {
"boundingBox": [1135, 646, 1226, 647, 1227, 669, 1135, 667],
"text": "Country)"
}]
}, {
"boundingBox": [49, 662, 266, 668, 265, 690, 49, 684],
"text": "Solicitable Code .",
"words": [{
"boundingBox": [56, 665, 182, 667, 181, 686, 56, 684],
"text": "Solicitable"
}, {
"boundingBox": [191, 667, 238, 670, 236, 688, 190, 686],
"text": "Code"
}, {
"boundingBox": [249, 671, 262, 673, 260, 690, 248, 689],
"text": "."
}]
}, {
"boundingBox": [548, 668, 726, 671, 725, 692, 547, 689],
"text": "Zip Code 1900 -",
"words": [{
"boundingBox": [548, 670, 579, 672, 579, 689, 548, 687],
"text": "Zip"
}, {
"boundingBox": [591, 672, 635, 673, 636, 691, 591, 689],
"text": "Code"
}, {
"boundingBox": [658, 673, 702, 672, 703, 692, 658, 691],
"text": "1900"
}, {
"boundingBox": [714, 672, 725, 671, 726, 692, 714, 692],
"text": "-"
}]
}, {
"boundingBox": [1020, 672, 1199, 674, 1198, 696, 1019, 694],
"text": "Zip Code 1900 -",
"words": [{
"boundingBox": [1024, 673, 1058, 674, 1059, 695, 1025, 694],
"text": "Zip"
}, {
"boundingBox": [1068, 674, 1115, 675, 1116, 696, 1068, 695],
"text": "Code"
}, {
"boundingBox": [1136, 675, 1183, 675, 1183, 696, 1136, 696],
"text": "1900"
}, {
"boundingBox": [1193, 675, 1198, 675, 1198, 696, 1193, 696],
"text": "-"
}]
}, {
"boundingBox": [46, 720, 320, 725, 319, 747, 45, 742],
"text": "F2=View Last Maintenance",
"words": [{
"boundingBox": [46, 721, 124, 723, 123, 744, 46, 740],
"text": "F2=View"
}, {
"boundingBox": [134, 723, 180, 724, 180, 745, 134, 744],
"text": "Last"
}, {
"boundingBox": [189, 724, 314, 726, 313, 745, 188, 745],
"text": "Maintenance"
}]
}, {
"boundingBox": [354, 722, 492, 725, 491, 746, 353, 743],
"text": "F8=Loan Data",
"words": [{
"boundingBox": [357, 724, 435, 725, 435, 745, 357, 743],
"text": "F8=Loan"
}, {
"boundingBox": [445, 725, 492, 726, 491, 746, 445, 745],
"text": "Data"
}]
}, {
"boundingBox": [699, 726, 976, 729, 975, 751, 698, 747],
"text": "F10=Owners/Beneficiaries",
"words": [{
"boundingBox": [702, 728, 971, 731, 970, 751, 702, 747],
"text": "F10=Owners/Beneficiaries"
}]
}, {
"boundingBox": [1033, 729, 1145, 731, 1145, 752, 1032, 750],
"text": "F12=Cancel",
"words": [{
"boundingBox": [1035, 731, 1145, 732, 1145, 753, 1035, 750],
"text": "F12=Cancel"
}]
}, {
"boundingBox": [44, 750, 294, 749, 294, 771, 45, 772],
"text": "F7=User Defined Fields",
"words": [{
"boundingBox": [46, 750, 124, 752, 124, 772, 45, 769],
"text": "F7=User"
}, {
"boundingBox": [133, 752, 214, 752, 214, 772, 133, 772],
"text": "Defined"
}, {
"boundingBox": [223, 752, 293, 750, 293, 769, 224, 771],
"text": "Fields"
}]
}, {
"boundingBox": [355, 749, 661, 754, 660, 775, 354, 771],
"text": "F9=FATCA/Substantial Owners",
"words": [{
"boundingBox": [357, 750, 584, 754, 584, 774, 357, 771],
"text": "F9=FATCA/Substantial"
}, {
"boundingBox": [590, 754, 661, 754, 661, 776, 591, 774],
"text": "Owners"
}]
}, {
"boundingBox": [695, 752, 994, 757, 993, 780, 694, 775],
"text": "F11=Alternate Address Data",
"words": [{
"boundingBox": [701, 754, 849, 756, 848, 778, 700, 776],
"text": "F11=Alternate",
"confidence": "Low"
}, {
"boundingBox": [856, 756, 939, 758, 938, 779, 855, 778],
"text": "Address"
}, {
"boundingBox": [945, 758, 993, 760, 993, 779, 944, 779],
"text": "Data"
}]
}, {
"boundingBox": [1035, 756, 1238, 759, 1237, 781, 1034, 777],
"text": "F16=High Risk Type",
"words": [{
"boundingBox": [1035, 759, 1124, 758, 1124, 779, 1035, 779],
"text": "F16=High"
}, {
"boundingBox": [1134, 759, 1181, 760, 1181, 780, 1134, 779],
"text": "Risk"
}, {
"boundingBox": [1191, 760, 1238, 762, 1237, 781, 1191, 780],
"text": "Type"
}]
}]
}]
}
我想获取属性文本的值(“邮政编码1900-”),但是还有另一个具有相同名称的属性。您能否帮助我如何使用VB仅提取响应中各行下的文本?例如,我想像下面这样提取
显示记录 顾客号码 。 。 0012343345 简称 。 。 。 。 。 。鞋子和东西
下面是我的代码
Dim ocrProperties As azureOcrJsonProperty = JsonConvert.DeserializeObject(Of azureOcrJsonProperty)(JsonReponse)
For Each item As PropertyInfo In ocrProperties.GetType.GetProperties()
If item.PropertyType.IsNested Then
Dim TypeHolder as Type = item.GetValue(ocrProperties)
For Each subitem As PropertyInfo In TypeHolder.GetType.GetProperties()
Formatted_JSon = subitem.Name & ": " & subitem.GetValue(TypeHolder).ToString
Next
Else
Formatted_JSon = item.Name & ": " & item.GetValue(ocrProperties).ToString
End If
Next
Public Class azureOcrJsonProperty
Public Property status As String
Public Property recognitionResults As recognitionResults
End Class
Public Class recognitionResults
Public Property page As Integer
Public Property clockwiseOrientation As decimal
Public Property width As Integer
Public Property height As Integer
Public Property unit As String
Public Property lines As lines
End Class
Public Class lines
Public Property boundingBox As String()
Public Property text As decimal
Public Property words As words
End Class
Public Class words
Public Property boundingBox As String()
Public Property text As decimal
End Class
我遇到错误了
无法将当前JSON数组(例如[1,2,3])反序列化为类型'Get_Customer_Info + recognitionResults',因为该类型需要JSON对象(例如{“ name”:“ value”})才能正确反序列化。 要解决此错误,可以将JSON更改为JSON对象(例如{“ name”:“ value”}),也可以将反序列化类型更改为数组,或者将实现集合接口的类型(例如ICollection,IList)更改为List,例如List从JSON数组反序列化。还可以将JsonArrayAttribute添加到类型中,以强制其从JSON数组反序列化。 路径“ recognitionResults”,第1行,位置44。
答案 0 :(得分:0)
JSON结构本身非常简单,请参阅本页底部的结果类模型。
主类( OcrResults
)公开了执行标准的 Serialize()
和 Deserialize(json As String)
公共方法操作。
要反序列化JSON,请按以下方式调用Deserialize()
方法:
Dim ocrResults = New OcrResults()
Dim ocrObjects = ocrResults.Deserialize(json)
或
Dim ocrObjects = New OcrResults(json).Deserialize()
可以修改生成的类对象并将其序列化回原始JSON结构(如果需要),调用Serialize()
方法:
Dim ocrResults = New OcrResults()
Dim ocrObjects = ocrResults.Deserialize(json)
'(...)
Dim newJson As String = ocrResults.Serialize(ocrObjects)
由于此JSON包含OCR过程的结果-将识别的文本的边界多边形引用为整数数组-我添加了(略微 verbose ){{1} }( JsonConverter
),将整数数组转换为BoundingBoxConverter
结构的数组(并在序列化类模型时转换为整数数组)。
如果/在识别的文本部分周围绘制多边形时,这消除了一个步骤(通常,作为Point
类的图形类接受GraphicsPath
/ Point
的数组作为参数)。
反序列化对象的 PointF
属性是Lines
类。
每个List(Of Line)
属性都包含完整的已识别文本序列,而Line().Text
列表包含同一序列的单个部分。
可以使用LINQ查询以不同方式提取整行。
例如,如问题中所述,这些行:
Line().Words()
可以提取为:
Display Record Customer Number . . 0012343345 Short Name . . . . . . SHOES AND THINGS
Dim ocrObjects = New OcrResults(json).Deserialize()
Dim searchResult = ocrObjects.RecognitionResults.
SelectMany(Function(r) r.Lines.
SkipWhile(Function(ln) Not ln.Text.Contains("Display Record")).
TakeWhile(Function(ln) Not ln.Text.StartsWith("Name"))).ToList()
列表按顺序包含从searchResult
到Line
开始的所有"Display Record"
对象。
要打印结果:
"Short Name . . . . . . SHOES AND THINGS"
等同于:
Console.WriteLine(String.Join(" ", searchResult.Select(Function(ln) ln.Text)))
提取第一个 For Each line As OcrResults.Line In searchResult
Console.Write(line.Text)
Next
对象,其中Text属性包含Line
:
"Zip Code 1900"
或将第一个识别出的 Page 分配给新对象,因此您可以直接使用其 Dim zipCode = ocrObjects.RecognitionResults.
SelectMany(Function(r) r.Lines).
First(Function(ln) ln.Text.Contains("Zip Code 1900"))
Dim zipCodeText = zipCode.Text
Dim zipPoints = zipCode.BoundingBox
对象:
Lines
绘制文本部分:
每行可识别的文本指定包含字符串的多边形。
绘制与每个文本行(其中
Dim ocrFirstPage = ocrObjects.RecognitionResults(0)
Dim zipCode = ocrFirstPage.Lines.
First(Function(ln) ln.Text.Contains("Zip Code 1900"))
Dim zipCodeText = zipCode.Text
Dim zipPoints = zipCode.BoundingBox
数组相对应的Polygons
每个boundingBox
类都有一个 OcrObjects.RecognitionResults
属性,该属性定义了由OCR标识的文本的旋转角度。
该角度可用于旋转图形(或最终使多边形变直)。
例如,使用Windows Forms ClockwiseOrientation
控件作为画布,并使用GraphicsPath作为Polygons的容器:
PictureBox
这将绘制前5个多边形,得到以下结果:
Dim ocrObjects As OcrResults.Results = Nothing
' (...)
' Somewhere...
ocrObjects = New OcrResults(json).Deserialize()
PictureBox1.Invalidate()
' (...)
Private Sub PictureBox1_Paint(sender As Object, e As PaintEventArgs) Handles PictureBox1.Paint
If ocrObjects Is Nothing Then Return
Dim ocr = ocrObjects.RecognitionResults(0)
'Dim angle As Single = Convert.ToSingle(ocr.ClockwiseOrientation)
Using path As GraphicsPath = New GraphicsPath
For i As Integer = 0 To 4
path.AddPolygon(ocr.Lines(i).BoundingBox)
Next
e.Graphics.SmoothingMode = SmoothingMode.AntiAlias
'e.Graphics.RotateTransform(-angle)
e.Graphics.DrawPath(Pens.Red, path)
End Using
End Sub
类模型:
OcrResults