因此,我一直在研究一个程序,该程序需要两个JSON文件,将它们放入两个字典中,并基于公共密钥进行组合,然后将它们写入新的JSON文件中。每个记录都必须是JSON文件中自己的文档。我设法将两个文件合并
我的尝试
OUTFILE = 'c:\\Users\\kiero\\PycharmProjects\\untitled\\source\\AmazonDataSettest.json' # Text file with generated JSON
out_f = open(OUTFILE, 'w')
asin_with_reviews = {}
asin_with_meta_data = {}
final = {}
line_counter = 0
for line in open("c:\\Users\\kiero\\PycharmProjects\\untitled\\source\\videogames.json", 'r'):
json_line = json.loads(line)
asin_with_meta_data[json_line["asin"]] = json_line
if line_counter % 10000 == 0:
print(line_counter)
line_counter += 1
line_counter = 0
for line in open("c:\\Users\\kiero\\PycharmProjects\\untitled\\source\\reviewstest.json", 'r'):
json_line = json.loads(line)
asin = json_line["asin"]
if asin in asin_with_reviews:
contained_reviews = asin_with_reviews[asin]
contained_reviews.append(json_line)
asin_with_reviews[asin] = contained_reviews
else:
new_reviews = []
new_reviews.append(json_line)
asin_with_reviews[asin] = new_reviews
if line_counter % 10000 == 0:
print(line_counter)
line_counter += 1
line_counter = 0
for key in (asin_with_meta_data.keys() | asin_with_reviews.keys()):
if key in asin_with_meta_data: final.setdefault(key, []).append(asin_with_meta_data[key])
if key in asin_with_reviews: final.setdefault(key, []).append(asin_with_reviews[key])
if line_counter % 10000 == 0:
print(line_counter)
line_counter += 1
print("final dict merged")
list(final.keys())
with open("c:\\Users\\kiero\\PycharmProjects\\untitled\\source\\AmazonDataSettest.json", "w") as json_file:
for key in final.items():
print(json.dumps(key), file=json_file)
现在它将每个JSON对象写入其自己的行,但是,它没有以正确的格式进行操作。我以这种格式获取它
[
"B000CSR2WE",
[
{
"asin":"B000CSR2WE",
"description":"Sega CD game",
"price":199.99,
"imUrl":"http://ecx.images-amazon.com/images/I/212FW43G1JL.jpg",
"related":{
"also_bought":[
"B0007WC7ES",
"B000A7DZ9Q",
"B00002ST6Y",
"B0007UF2DI",
"B000035XN5",
"B000K5ZW1C",
"B000VMMXTI",
"B000GO5BOK"
],
"buy_after_viewing":[
"B0009OGJ02"
]
},
"salesRank":{
"Video Games":38713
},
"categories":[
[
"Video Games",
"Kids & Family"
],
[
"Video Games",
"More Systems",
"Sega CD",
"Games"
]
],
[
{
"reviewerID":"A1G0VFQ9198IUF",
"asin":"B00000DMAU",
"reviewerName":"al",
"helpful":[
1,
1
],
"reviewText":"........",
"overall":5.0,
"summary":"the first true race game",
"unixReviewTime":1339372800,
"reviewTime":"06 11, 2012"
},
{
"reviewerID":"ACSZTM3KHFPBL",
"asin":"B00000DMAU",
"reviewerName":"Andy Griffith \"Bloodrayne\"",
"helpful":[
0,
0
],
"reviewText":"This is, by far, undeniably the",
"overall":5.0,
"summary":"....",
"unixReviewTime":1339372800,
"reviewTime":"06 11, 2012"
}
}
]
]
当我需要采用以下格式时:
{
"asin":"B000CSR2WE",
"description":"Sega CD game",
"price":199.99,
"imUrl":"http://ecx.images-amazon.com/images/I/212FW43G1JL.jpg",
"related":{
"also_bought":[
"B0007WC7ES",
"B000A7DZ9Q",
"B00002ST6Y",
"B0007UF2DI",
"B000035XN5",
"B000K5ZW1C",
"B000VMMXTI",
"B000GO5BOK"
],
"buy_after_viewing":[
"B0009OGJ02"
]
},
"salesRank":{
"Video Games":38713
},
"categories":[
[
"Video Games",
"Kids & Family"
],
[
"Video Games",
"More Systems",
"Sega CD",
"Games",
[
{
"reviewerID":"A1G0VFQ9198IUF",
"asin":"B00000DMAU",
"reviewerName":"al",
"helpful":[
1,
1
],
"reviewText":"........",
"overall":5.0,
"summary":"the first true race game",
"unixReviewTime":1339372800,
"reviewTime":"06 11, 2012"
},
{
"reviewerID":"ACSZTM3KHFPBL",
"asin":"B00000DMAU",
"reviewerName":"Andy Griffith \"Bloodrayne\"",
"helpful":[
0,
0
],
"reviewText":"This is, by far, undeniably the",
"overall":5.0,
"summary":"....",
"unixReviewTime":1339372800,
"reviewTime":"06 11, 2012"
}
]
]
}