我有一个需要分析的811行JSON文件。现在,我正在使用以下命令来解析我感兴趣的数据({awk
是必需的,因为我使用的JSON不能以适当的数组提供数据):
sed 's/},/},\n/g' 1st_run.json |awk '/"characater"/ { gsub("\"characater\"", "\"char" ++n "\"", $0) } 1'| jq -r '.frames.frame.lps.lp|.characters[]|[.code_ascii,.confidence]|@tsv'
这项工作正常,但我收到大量无法以任何方式分隔的数据。如何至少在JSON中每行有可分析结果的行后插入定界符?
我的JSON输入类似:
...
{"response":{"container":{"id":"80d996a1-c267-4fa4-b3f8-f61ff9fda198","timestamp":"2018-Jul-10 17:00:50.829709"},"id":"00000002-0000-0000-0000-000000000002"},"frames":{"frame":{"id":"398","timestamp":"2016-Nov-30 12:56:47.900000","lps":{"lp":{"licenseplate":"FRJ724","text":"FRJ724","wtext":"FRJ724","confidence":"67","bkcolor":"16777215","color":"16777215","type":"540122","ntip":"6","cct_country_short":"USA","cct_state_short":"NY","tips":{"tip":{"poly":{"p":{"x":"1553","y":"249"},"p":{"x":"1559","y":"249"},"p":{"x":"1559","y":"267"},"p":{"x":"1553","y":"267"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"88"},"tip":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"96"},"tip":{"poly":{"p":{"x":"1569","y":"248"},"p":{"x":"1575","y":"248"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"tip":{"poly":{"p":{"x":"1585","y":"248"},"p":{"x":"1591","y":"248"},"p":{"x":"1591","y":"267"},"p":{"x":"1585","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"94"},"tip":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"88"},"tip":{"poly":{"p":{"x":"1602","y":"248"},"p":{"x":"1607","y":"248"},"p":{"x":"1607","y":"266"},"p":{"x":"1602","y":"266"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"99"}},"ncharacter":"6","characters":{"characater":{"poly":{"p":{"x":"1553","y":"249"},"p":{"x":"1559","y":"249"},"p":{"x":"1559","y":"267"},"p":{"x":"1553","y":"267"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"88"},"characater":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"96"},"characater":{"poly":{"p":{"x":"1569","y":"248"},"p":{"x":"1575","y":"248"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"characater":{"poly":{"p":{"x":"1585","y":"248"},"p":{"x":"1591","y":"248"},"p":{"x":"1591","y":"267"},"p":{"x":"1585","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"94"},"characater":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"88"},"characater":{"poly":{"p":{"x":"1602","y":"248"},"p":{"x":"1607","y":"248"},"p":{"x":"1607","y":"266"},"p":{"x":"1602","y":"266"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"99"}},"det_time_us":"776874","poly":{"p":{"x":"1543","y":"237"},"p":{"x":"1618","y":"237"},"p":{"x":"1618","y":"274"},"p":{"x":"1543","y":"274"}}}},"det_time_us":"1883017"}}}
{"response":{"container":{"id":"fa75e8f8-1b44-4f2f-a09b-6fe3b801ca1b","timestamp":"2018-Jul-10 17:00:55.863641"},"id":"00000002-0000-0000-0000-000000000002"},"frames":{"frame":{"id":"399","timestamp":"2016-Nov-30 12:56:48","lps":{"lp":{"licenseplate":"FRJ724","text":"FRJ724","wtext":"FRJ724","confidence":"47","bkcolor":"16777215","color":"16777215","type":"540122","ntip":"6","cct_country_short":"USA","cct_state_short":"NY","tips":{"tip":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"tip":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"tip":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"tip":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"tip":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"tip":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"ncharacter":"6","characters":{"characater":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"characater":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"characater":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"characater":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"characater":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"characater":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"det_time_us":"600136","poly":{"p":{"x":"1543","y":"238"},"p":{"x":"1618","y":"239"},"p":{"x":"1619","y":"274"},"p":{"x":"1543","y":"273"}}}},"det_time_us":"1495308"}}}
{"response":{"container":{"id":"5c9c773c-a72a-488f-bc49-148dcd6cfa0a","timestamp":"2018-Jul-10 17:01:01.756522"},"id":"00000002-0000-0000-0000-000000000002"},"frames":{"frame":{"id":"400","timestamp":"2016-Nov-30 12:56:48.100000","lps":{"lp":{"licenseplate":"FRJ724","text":"FRJ724","wtext":"FRJ724","confidence":"47","bkcolor":"16777215","color":"16777215","type":"540122","ntip":"6","cct_country_short":"USA","cct_state_short":"NY","tips":{"tip":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"tip":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"tip":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"tip":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"tip":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"tip":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"ncharacter":"6","characters":{"characater":{"poly":{"p":{"x":"1553","y":"248"},"p":{"x":"1560","y":"248"},"p":{"x":"1560","y":"266"},"p":{"x":"1554","y":"266"}},"bkcolor":"16777215","color":"0","code":"70","code_ascii":"F","confidence":"96"},"characater":{"poly":{"p":{"x":"1561","y":"248"},"p":{"x":"1568","y":"248"},"p":{"x":"1568","y":"267"},"p":{"x":"1561","y":"267"}},"bkcolor":"16777215","color":"0","code":"82","code_ascii":"R","confidence":"98"},"characater":{"poly":{"p":{"x":"1569","y":"247"},"p":{"x":"1576","y":"247"},"p":{"x":"1576","y":"267"},"p":{"x":"1569","y":"267"}},"bkcolor":"16777215","color":"0","code":"74","code_ascii":"J","confidence":"96"},"characater":{"poly":{"p":{"x":"1586","y":"248"},"p":{"x":"1592","y":"248"},"p":{"x":"1592","y":"267"},"p":{"x":"1586","y":"267"}},"bkcolor":"16777215","color":"0","code":"55","code_ascii":"7","confidence":"95"},"characater":{"poly":{"p":{"x":"1593","y":"248"},"p":{"x":"1600","y":"248"},"p":{"x":"1600","y":"267"},"p":{"x":"1593","y":"267"}},"bkcolor":"16777215","color":"0","code":"50","code_ascii":"2","confidence":"86"},"characater":{"poly":{"p":{"x":"1601","y":"249"},"p":{"x":"1608","y":"249"},"p":{"x":"1608","y":"265"},"p":{"x":"1601","y":"265"}},"bkcolor":"16777215","color":"0","code":"52","code_ascii":"4","confidence":"63"}},"det_time_us":"457492","poly":{"p":{"x":"1543","y":"238"},"p":{"x":"1618","y":"239"},"p":{"x":"1619","y":"274"},"p":{"x":"1543","y":"273"}}}},"det_time_us":"1311946"}}}
...
4 99
9 95
2 94
3 94
9 97
B 96
A 92
B 94
L 76
E 88
B 90
R 95
1 85
4 99
9 87
2 98
3 97
9 98
B 98
A 94
4 91
9 97
2 90
3 92
9 96
B 98
A 99
在每条JSON行之后插入分隔符,与提取的项数(每行)无关-(在JSON中等于.ncharacter
)
4 99
9 95
2 94
3 94
9 97
B 96
----------
A 92
B 94
L 76
E 88
B 90
R 95
1 85
4 99
----------
9 87
2 98
3 97
9 98
B 98
A 94
4 91
----------
9 97
2 90
3 92
9 96
B 98
A 99
答案 0 :(得分:0)
您可以使用awk为每行打印额外的换行[ ]
和,
。
awk 'BEGIN {print "["} END {print "]"} {gsub(/characater/, "char" ++n); print $0 ","}'
答案 1 :(得分:0)
好吧
我通过编写可以处理格式错误的JSON数据的Python脚本解决了这个问题。
这个想法是分别遍历每一行,然后用子字符串将内容分解,以提取ascii_code
和confidence
,最后它们看起来像:
#!/usr/bin/python
def mysplit( str ):
spltstr = str.split("code_ascii")
itr = iter(spltstr)
next(itr)
for k in itr:
a = k.split("\"")
print a[2] + " " +a[6]
filepath = 'test2.json'
with open(filepath) as fp:
line = fp.readline()
cnt = 1
while line:
print "----------"
mysplit(line)
line = fp.readline()
cnt += 1
我认为这应该为我做很多...