我对python比较陌生。我试图将所有JSON文件合并到一个文件夹中的单个JSON文件中。我可以做我的合并。但是我想删除每个文件中第一行的一些字符,以使整个JSON有效。
# Script to combine all jsons but need to remove the closing , at the end
import glob
import re
# read the whole folder
read_files = glob.glob("bus_stop_1012/*.json")
with open("bus_stop_1012/bus_arrival_1012.json", "wb") as outfile:
# this is the beginning of the combined file
outfile.write(' ')
for f in read_files:
# will append each data file
with open(f, "rb") as infile:
outfile.write(infile.read())
# will have to add , at the end of each element
outfile.write(',')
# move back 1 character to remove the last , and end the file
outfile.seek(-1,1)
outfile.write(']}')
从2个json文件的示例生成此单个JSON文件:
{"data": [{"time": "2016-03-02 17:45:20 SGT+0800", "result":{
"BusStopID": "1012",
"Services": [
{
"NextBus": {
"EstimatedArrival": "2016-03-02T17:48:21+08:00",
"Feature": "WAB",
"Latitude": "1.2871405",
"Load": "Seats Available",
"Longitude": "103.8456715",
"VisitNumber": "1"
},
"Operator": "SBST",
"OriginatingID": "10589",
"ServiceNo": "12",
"Status": "In Operation",
"SubsequentBus": {
"EstimatedArrival": "2016-03-02T17:56:02+08:00",
"Feature": "WAB",
"Latitude": "0",
"Load": "Seats Available",
"Longitude": "0",
"VisitNumber": "1"
},
"SubsequentBus3": {
"EstimatedArrival": "2016-03-02T18:06:02+08:00",
"Feature": "WAB",
"Latitude": "0",
"Load": "Seats Available",
"Longitude": "0",
"VisitNumber": "1"
},
"TerminatingID": "77009"
}
],
"odata.metadata":
"http://datamall2.mytransport.sg/ltaodataservice/$metadata#BusArrival/@Element"
}},{"data": [{"time": "2016-03-02 17:49:36 SGT+0800", "result":{
"BusStopID": "1012",
"Services": [
{
"NextBus": {
"EstimatedArrival": "2016-03-02T17:48:47+08:00",
"Feature": "WAB",
"Latitude": "1.2944553333333333",
"Load": "Seats Available",
"Longitude": "103.85045283333334",
"VisitNumber": "1"
},
"Operator": "SBST",
"OriginatingID": "10589",
"ServiceNo": "12",
"Status": "In Operation",
"SubsequentBus": {
"EstimatedArrival": "2016-03-02T17:58:26+08:00",
"Feature": "WAB",
"Latitude": "1.2821243333333334",
"Load": "Seats Available",
"Longitude": "103.841401",
"VisitNumber": "1"
},
"SubsequentBus3": {
"EstimatedArrival": "2016-03-02T18:06:02+08:00",
"Feature": "WAB",
"Latitude": "0",
"Load": "Seats Available",
"Longitude": "0",
"VisitNumber": "1"
},
"TerminatingID": "77009"
}
],
"odata.metadata": "http://datamall2.mytransport.sg/ltaodataservice/$metadata#BusArrival/@Element"
}}]}
我需要在每个JSON文件中找到 {“data”:[]每个后续JSON文件。
答案 0 :(得分:1)
你可以从JSON解码,提取你想要的元素,然后再将它们写成JSON。
如果目标是生成一个大的{"data": [....]}
列表,如果您不小心写下最后一个逗号,则可以单独编写列表中的每个元素:
import glob
import json
# read the whole folder
read_files = glob.glob("bus_stop_1012/*.json")
with open("bus_stop_1012/bus_arrival_1012.json", "wb") as outfile:
# this is the beginning of the combined file
outfile.write('{"data": [\n')
sep = ''
for f in read_files:
# will append each data file
with open(f) as infile:
try:
for obj in json.load(infile)['data']:
outfile.write(sep)
json.dump(obj, outfile)
sep = ','
except ValueError:
print 'Failed to load {}'.format(f)
outfile.write(']}')