我正在尝试在Ubuntu平台中组合多个JSON文件。例如,两个文件中的数据如下:
File_1
{
"artist":"Gob",
"timestamp":"2011-08-09 01:59:41.352247",
"similars":[
[
"TRTOVWD128F92F4227",
1
],
[
"TRUXNUD128F92F41D0",
0.97294099999999994
],
[
"TRNNOJO128F42992E9",
0.073926900000000004
],
[
"TRGZHTT128F423B2A4",
0.068387699999999996
],
[
"TRGYKYD128F42625F6",
0.065579700000000005
],
[
"TRGIWHY128F42625F5",
0.064063700000000001
],
[
"TRJCJTX128F930CACE",
0.063140100000000005
],
[
"TRMYNWT128F426254B",
0.0613825
],
[
"TRRQOJI128F428C865",
0.061121599999999998
],
[
"TRBNYHM128F428A569",
0.061121599999999998
],
[
"TRDLOYE128F4241E72",
0.060951900000000003
],
[
"TRNRVEW12903CBA24F",
0.060332700000000003
],
[
"TRKKIPG12903CBA083",
0.060155
],
[
"TRZHTGP128F428A63B",
0.059873599999999999
],
[
"TRKQSGZ128F428A851",
0.059873599999999999
],
[
"TRTOPDF128F42AD88A",
0.059687799999999999
],
[
"TRIWOPM128F4241E53",
0.058958900000000002
],
[
"TRCCJUW128F14652DB",
0.057935
],
[
"TRERDDF128F428ECC4",
0.057566600000000002
],
[
"TROKWNN128F421A3D8",
0.057379800000000002
],
[
"TRWGOOK128F42AE765",
0.057125000000000002
],
[
"TRFMNKP128F428ADC0",
0.056875099999999998
],
[
"TRDMLZT128F42A01A8",
0.055808900000000002
],
[
"TRGCJVM128E0780E48",
0.0547389
],
[
"TRRXGAY128F14652D7",
0.0538065
],
[
"TRIPEHH128F1462DFF",
0.052843000000000001
],
[
"TRDUOIP128F147D5A7",
0.051851500000000002
],
[
"TRZCHHD12903CC80A1",
0.051251699999999997
],
[
"TRFDDQS128F426243F",
0.051018300000000003
],
[
"TRZDKAR128F42591B8",
0.050740899999999999
],
[
"TRDVXUG128F1456CBF",
0.050486299999999998
],
[
"TRULRYN128F145FC1C",
0.050219800000000002
],
[
"TRMOWIA128F425CE0F",
0.049977500000000001
],
[
"TRUVPMZ128F42B6DF3",
0.049762000000000001
],
[
"TRSBDWW128F4262666",
0.049643699999999999
],
[
"TRKPHWQ128F4264F8C",
0.0495173
],
[
"TRBBLXU128F42623A1",
0.049416700000000001
],
[
"TRJKLLM128F1456C57",
0.049001599999999999
],
[
"TRSAAEI128F4216C24",
0.048813500000000003
],
[
"TRFXICT128F4264F8A",
0.048776199999999999
],
[
"TRINVLH12903CBE5A1",
0.048334500000000002
],
[
"TRMUUJR128F4262475",
0.048306500000000002
],
[
"TRTORTD128F1456AFA",
0.0468265
],
[
"TRECUJO12903CA7120",
0.046065599999999998
],
[
"TRXIRBQ128F93431BB",
0.0456938
],
[
"TRFDDVK128F42B6DF0",
0.045623799999999999
],
[
"TRSRGPM128F421A30B",
0.043976800000000003
],
[
"TRVUPPR128F429507D",
0.042872500000000001
],
[
"TRMHCZC128F428A4CD",
0.040675200000000002
],
[
"TRUFDRV128F4262352",
0.040675200000000002
],
[
"TRUZZHT128F93229AF",
0.039422199999999998
],
[
"TRLSIHL128F429AF18",
0.039002099999999998
],
[
"TRGETCK128F1460DB1",
0.038499499999999999
],
[
"TRSXXNU128F428AEF2",
0.038303799999999999
],
[
"TRFZXSY128F9330D9F",
0.037855199999999999
],
[
"TRPHFYF128F92F27FA",
0.037772100000000003
],
[
"TRNRHSL128F9337B55",
0.036998000000000003
],
[
"TRPTGNZ128F421A56B",
0.036713099999999999
],
[
"TRPAASI128F9337B6E",
0.036410499999999998
],
[
"TRGCROO128F93431C4",
0.035754300000000003
],
[
"TRCUHZL128F4235446",
0.034968699999999998
],
[
"TRDPOTJ128F429AF0C",
0.034860500000000003
],
[
"TROZUXM128F42790A2",
0.0346483
],
[
"TRJVLOQ128F9345A82",
0.034547799999999997
],
[
"TRQTFRP128F145FC1E",
0.033934600000000002
],
[
"TRQEWHR128F421A3F5",
0.032314599999999999
],
[
"TRNTPJA128F4265039",
0.030702900000000002
],
[
"TRDGXWY12903CF52BD",
0.030292300000000001
],
[
"TRBLEMZ128F93102D0",
0.029224300000000002
],
[
"TRBUUYO128F421A405",
0.028448500000000002
],
[
"TREVBDI12903CED7E6",
0.0279674
],
[
"TRKREBF128F429B317",
0.0258321
],
[
"TRZBYPR128F4233A8D",
0.025655000000000001
],
[
"TRTAZUQ12903CFEA78",
0.024545399999999998
],
[
"TRAIPRO128F429AE69",
0.024304699999999999
],
[
"TRTTVUZ128F92FADD3",
0.023320899999999999
],
[
"TRUYEJI128F4265041",
0.022173700000000001
],
[
"TRAXVGT128F9344507",
0.0213992
],
[
"TRJJBLH128F4260DA1",
0.0175365
],
[
"TRAMCWR128F4233F7F",
0.0161158
],
[
"TRXBLME128F424330F",
0.015760900000000001
],
[
"TRMUQXM128F4260D99",
0.015696000000000002
],
[
"TRHRZBJ128EF345514",
0.0156951
],
[
"TRJXIBT128F42454DB",
0.014519199999999999
],
[
"TRTHPOY128F9345AA5",
0.0137264
],
[
"TRRFGJU128F933B2E6",
0.0012336199999999999
],
[
"TRMYJUA128F428A590",
0.00123149
],
[
"TRNMVTE128F933B2EC",
0.00122703
],
[
"TRYALZM128F1483C7D",
0.0012245299999999999
],
[
"TRZVEJU128F4234F4E",
0.00121805
],
[
"TRQAZDO128F145639F",
0.0012166600000000001
],
[
"TRJXNJM12903CF57ED",
0.0012155
],
[
"TRVAOGO128F427C9D6",
0.00120951
],
[
"TRZMZDS128F422843B",
0.0012065000000000001
],
[
"TRXIEOF12903CE8212",
0.0012058699999999999
],
[
"TRPVVUG128F42A36AA",
0.0012057599999999999
],
[
"TRXGVXS128F428AA5C",
0.0012019400000000001
],
[
"TRUBOGF128E078A5B9",
0.0012017900000000001
],
[
"TRITZSB128F4277CC2",
0.0012014
],
[
"TRGHPHX128F9343544",
0.0011975600000000001
],
[
"TRUKWPE128F428114F",
0.00119666
],
[
"TROBGRB128F93229AB",
0.0011964199999999999
],
[
"TRGKTMW12903CFAE65",
0.00119637
]
],
"tags":[
[
"punk rock",
"100"
],
[
"punk",
"60"
]
],
"track_id":"TRAAAFD128F92F423A",
"title":"Face the Ashes"
}
File_2
{
"artist":"CLP",
"timestamp":"2011-08-02 06:36:59.879759",
"similars":[
],
"tags":[
],
"track_id":"TRAAAVG12903CFA543",
"title":"Insatiable (Instrumental Version)"
}
我写了一个Python脚本来组合它们。我在每条记录后添加了一个新行和一个逗号。
import glob
read_files = glob.glob("*.json")
with open("merged_file.json", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(infile.read())
outfile.write(',\n')
合并文件的输出是:
{
"artist":"Gob",
"timestamp":"2011-08-09 01:59:41.352247",
"similars":[
[
"TRTOVWD128F92F4227",
1
],
[
"TRUXNUD128F92F41D0",
0.97294099999999994
],
[
"TRNNOJO128F42992E9",
0.073926900000000004
],
[
"TRGZHTT128F423B2A4",
0.068387699999999996
],
[
"TRGYKYD128F42625F6",
0.065579700000000005
],
[
"TRGIWHY128F42625F5",
0.064063700000000001
],
[
"TRJCJTX128F930CACE",
0.063140100000000005
],
[
"TRMYNWT128F426254B",
0.0613825
],
[
"TRRQOJI128F428C865",
0.061121599999999998
],
[
"TRBNYHM128F428A569",
0.061121599999999998
],
[
"TRDLOYE128F4241E72",
0.060951900000000003
],
[
"TRNRVEW12903CBA24F",
0.060332700000000003
],
[
"TRKKIPG12903CBA083",
0.060155
],
[
"TRZHTGP128F428A63B",
0.059873599999999999
],
[
"TRKQSGZ128F428A851",
0.059873599999999999
],
[
"TRTOPDF128F42AD88A",
0.059687799999999999
],
[
"TRIWOPM128F4241E53",
0.058958900000000002
],
[
"TRCCJUW128F14652DB",
0.057935
],
[
"TRERDDF128F428ECC4",
0.057566600000000002
],
[
"TROKWNN128F421A3D8",
0.057379800000000002
],
[
"TRWGOOK128F42AE765",
0.057125000000000002
],
[
"TRFMNKP128F428ADC0",
0.056875099999999998
],
[
"TRDMLZT128F42A01A8",
0.055808900000000002
],
[
"TRGCJVM128E0780E48",
0.0547389
],
[
"TRRXGAY128F14652D7",
0.0538065
],
[
"TRIPEHH128F1462DFF",
0.052843000000000001
],
[
"TRDUOIP128F147D5A7",
0.051851500000000002
],
[
"TRZCHHD12903CC80A1",
0.051251699999999997
],
[
"TRFDDQS128F426243F",
0.051018300000000003
],
[
"TRZDKAR128F42591B8",
0.050740899999999999
],
[
"TRDVXUG128F1456CBF",
0.050486299999999998
],
[
"TRULRYN128F145FC1C",
0.050219800000000002
],
[
"TRMOWIA128F425CE0F",
0.049977500000000001
],
[
"TRUVPMZ128F42B6DF3",
0.049762000000000001
],
[
"TRSBDWW128F4262666",
0.049643699999999999
],
[
"TRKPHWQ128F4264F8C",
0.0495173
],
[
"TRBBLXU128F42623A1",
0.049416700000000001
],
[
"TRJKLLM128F1456C57",
0.049001599999999999
],
[
"TRSAAEI128F4216C24",
0.048813500000000003
],
[
"TRFXICT128F4264F8A",
0.048776199999999999
],
[
"TRINVLH12903CBE5A1",
0.048334500000000002
],
[
"TRMUUJR128F4262475",
0.048306500000000002
],
[
"TRTORTD128F1456AFA",
0.0468265
],
[
"TRECUJO12903CA7120",
0.046065599999999998
],
[
"TRXIRBQ128F93431BB",
0.0456938
],
[
"TRFDDVK128F42B6DF0",
0.045623799999999999
],
[
"TRSRGPM128F421A30B",
0.043976800000000003
],
[
"TRVUPPR128F429507D",
0.042872500000000001
],
[
"TRMHCZC128F428A4CD",
0.040675200000000002
],
[
"TRUFDRV128F4262352",
0.040675200000000002
],
[
"TRUZZHT128F93229AF",
0.039422199999999998
],
[
"TRLSIHL128F429AF18",
0.039002099999999998
],
[
"TRGETCK128F1460DB1",
0.038499499999999999
],
[
"TRSXXNU128F428AEF2",
0.038303799999999999
],
[
"TRFZXSY128F9330D9F",
0.037855199999999999
],
[
"TRPHFYF128F92F27FA",
0.037772100000000003
],
[
"TRNRHSL128F9337B55",
0.036998000000000003
],
[
"TRPTGNZ128F421A56B",
0.036713099999999999
],
[
"TRPAASI128F9337B6E",
0.036410499999999998
],
[
"TRGCROO128F93431C4",
0.035754300000000003
],
[
"TRCUHZL128F4235446",
0.034968699999999998
],
[
"TRDPOTJ128F429AF0C",
0.034860500000000003
],
[
"TROZUXM128F42790A2",
0.0346483
],
[
"TRJVLOQ128F9345A82",
0.034547799999999997
],
[
"TRQTFRP128F145FC1E",
0.033934600000000002
],
[
"TRQEWHR128F421A3F5",
0.032314599999999999
],
[
"TRNTPJA128F4265039",
0.030702900000000002
],
[
"TRDGXWY12903CF52BD",
0.030292300000000001
],
[
"TRBLEMZ128F93102D0",
0.029224300000000002
],
[
"TRBUUYO128F421A405",
0.028448500000000002
],
[
"TREVBDI12903CED7E6",
0.0279674
],
[
"TRKREBF128F429B317",
0.0258321
],
[
"TRZBYPR128F4233A8D",
0.025655000000000001
],
[
"TRTAZUQ12903CFEA78",
0.024545399999999998
],
[
"TRAIPRO128F429AE69",
0.024304699999999999
],
[
"TRTTVUZ128F92FADD3",
0.023320899999999999
],
[
"TRUYEJI128F4265041",
0.022173700000000001
],
[
"TRAXVGT128F9344507",
0.0213992
],
[
"TRJJBLH128F4260DA1",
0.0175365
],
[
"TRAMCWR128F4233F7F",
0.0161158
],
[
"TRXBLME128F424330F",
0.015760900000000001
],
[
"TRMUQXM128F4260D99",
0.015696000000000002
],
[
"TRHRZBJ128EF345514",
0.0156951
],
[
"TRJXIBT128F42454DB",
0.014519199999999999
],
[
"TRTHPOY128F9345AA5",
0.0137264
],
[
"TRRFGJU128F933B2E6",
0.0012336199999999999
],
[
"TRMYJUA128F428A590",
0.00123149
],
[
"TRNMVTE128F933B2EC",
0.00122703
],
[
"TRYALZM128F1483C7D",
0.0012245299999999999
],
[
"TRZVEJU128F4234F4E",
0.00121805
],
[
"TRQAZDO128F145639F",
0.0012166600000000001
],
[
"TRJXNJM12903CF57ED",
0.0012155
],
[
"TRVAOGO128F427C9D6",
0.00120951
],
[
"TRZMZDS128F422843B",
0.0012065000000000001
],
[
"TRXIEOF12903CE8212",
0.0012058699999999999
],
[
"TRPVVUG128F42A36AA",
0.0012057599999999999
],
[
"TRXGVXS128F428AA5C",
0.0012019400000000001
],
[
"TRUBOGF128E078A5B9",
0.0012017900000000001
],
[
"TRITZSB128F4277CC2",
0.0012014
],
[
"TRGHPHX128F9343544",
0.0011975600000000001
],
[
"TRUKWPE128F428114F",
0.00119666
],
[
"TROBGRB128F93229AB",
0.0011964199999999999
],
[
"TRGKTMW12903CFAE65",
0.00119637
]
],
"tags":[
[
"punk rock",
"100"
],
[
"punk",
"60"
]
],
"track_id":"TRAAAFD128F92F423A",
"title":"Face the Ashes"
},
{
"artist":"CLP",
"timestamp":"2011-08-02 06:36:59.879759",
"similars":[
],
"tags":[
],
"track_id":"TRAAAVG12903CFA543",
"title":"Insatiable (Instrumental Version)"
}
当我使用JSON Lint(http://jsonlint.com/)验证这些记录时,它告诉我文件已损坏而不是有效的JSON。即使花了很长时间,我也无法弄清楚合并的问题。任何人对此有任何想法都会有所帮助。
答案 0 :(得分:14)
你不能只是连接两个JSON字符串来制作valid JSON(或者通过将',\n'
添加到每个字符串的末尾来组合它们。
相反,您可以将两个(作为Python对象)组合到Python列表中,然后使用json.dump
将其作为JSON写入文件:
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f, "rb") as infile:
result.append(json.load(infile))
with open("merged_file.json", "wb") as outfile:
json.dump(result, outfile)
如果你想在没有解决每个JSON文件的(不必要的)中间步骤的情况下这样做,你可以将它们合并到这样的列表中:
import glob
read_files = glob.glob("*.json")
with open("merged_file.json", "wb") as outfile:
outfile.write('[{}]'.format(
','.join([open(f, "rb").read() for f in read_files])))
答案 1 :(得分:3)
您生成的JSON无效。你得到的结果如下:
{...}, {...}
当然无效。
如果你想采用你的方法,你应该删除新文件中的最后一个}
和第一个{
,然后合并它们(你仍然需要确保JSON文件你想合并的是有效格式。)
我建议我使用json
模块来实现您需要实现的目标,它更清晰。
答案 2 :(得分:2)
如果您想生成这些对象的JSON 列表,那么您将错过在此处打开[
并关闭]
括号,并且会写一个逗号太多。
让Python解码对象更容易,然后将输出写为新的JSON列表:
import json
import glob
read_files = glob.glob("*.json")
output_list = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
with open("merged_file.json", "wb") as outfile:
json.dump(output_list, outfile)
答案 3 :(得分:1)
Python解决方案:
你有file1.json和file2.json文件。
每个文件都有结构:
[{"key1": "value1"}] - (in file1)
[{"key2": "value2"}] - (in file2)
您的目标是合并它们并获得下一个视图:
[{"key1": "value1"},
{"key2": "value2"}]
代码:
import glob
glob_data = []
for file in glob.glob('../../file*.json'):
with open(file) as json_file:
data = json.load(json_file)
i = 0
while i < len(data):
glob_data.append(data[i])
i += 1
with open('../../finalFile.json', 'w') as f:
json.dump(glob_data, f, indent=4)
答案 4 :(得分:1)
有一个名为jsonmerge
的模块,用于合并字典。只需提供两个字典就可以非常简单地使用它,也可以定义描述如何合并的模式,例如代替覆盖相同的键,自动创建列表并将其追加。
base = {
"foo": 1,
"bar": [ "one" ],
}
head = {
"bar": [ "two" ],
"baz": "Hello, world!"
}
from jsonmerge import merge
result = merge(base, head)
print(result)
>>> {'foo': 1, 'bar': ['two'], 'baz': 'Hello, world!'}
更多具有复杂规则的示例:https://pypi.org/project/jsonmerge/#description