我想循环遍历一堆像
这样的几个json文件{
"_notes": [],
"deviceid": "353422071089618",
"grp-milkuse/milksold": "0.0",
"hh_id": "0753628391",
"regdate": "2017-10-03",
"gpsloc": "-9.10112605948487 32.897751368000414 1488.8210801196851 4.0",
"grp-milkuse/milkprocess": "0.0",
"_xform_id_string": "ADGG-LNG01-20170725",
"hh_country": "2",
"meta/instanceID": "uuid:0b3b8257-f154-42e8-ae17-119bfa04ceca",
"_duration": "",
"visitdate": "2017-10-03",
"hh_region": "1007",
"_geolocation": [
-9.10112605948487,
32.897751368000414
],
"hh_district": "1037",
"datacollid": "0758990688",
"grp-milkuse/milkcalf": "0.0",
"_status": "submitted_via_web",
"formhub/uuid": "98dfbfd65ef24a92a46d6f794e748627",
"rpt_animrec": [
{
"rpt_animrec/grp_animrec/cowmilked": "3",
"rpt_animrec/calved": "2",
"rpt_animrec/grp_animrec/tagid": "TZN000404015233",
"rpt_animrec/injuries": "2",
"rpt_animrec/anim_weight/heartgirth": "150.0",
"rpt_animrec/anim_weight/bodyscore": "3.0",
"rpt_animrec/parasites": "2",
"rpt_animrec/grp_feedwater/watertype": "3",
"rpt_animrec/anim_weight/weight": "200.0",
"rpt_animrec/grp_feedwater/feedtype": "3 4",
"rpt_animrec/vaccinated": "2",
"rpt_animrec/served": "2"
},
{
"rpt_animrec/sirehastag": "2",
"rpt_animrec/siredetails/sirename": "Nil",
"rpt_animrec/grp_feedwater/feedtype": "2 3 4",
"rpt_animrec/siredetails/sirebreed": "2",
"rpt_animrec/vaccinated": "2",
"rpt_animrec/calved": "2",
"rpt_animrec/grp_animrec/tagid": "TZN000404015236",
"rpt_animrec/injuries": "2",
"rpt_animrec/anim_weight/bodyscore": "3.0",
"rpt_animrec/grp_service/dateserv": "2016-12-22",
"rpt_animrec/anim_weight/weight": "250.0",
"rpt_animrec/siredetails/sirecnty": "Nil",
"rpt_animrec/grp_servicedtls/servechange": "2",
"rpt_animrec/grp_servicedtls/servsourcebull": "1",
"rpt_animrec/parasites": "2",
"rpt_animrec/grp_feedwater/watertype": "3",
"rpt_animrec/served": "1",
"rpt_animrec/grp_animrec/cowmilked": "3",
"rpt_animrec/grp_servicedtls/servicechangeyes": "1",
"rpt_animrec/grp_servicedtls/sercost": "15000.0",
"rpt_animrec/anim_weight/heartgirth": "160.0",
"rpt_animrec/siredetails/sirecomp": "5",
"rpt_animrec/grp_service/servtype": "1"
},
{
"rpt_animrec/sirehastag": "2",
"rpt_animrec/siredetails/sirename": "Nill",
"rpt_animrec/grp_feedwater/feedtype": "1 3 4",
"rpt_animrec/siredetails/sirebreed": "2",
"rpt_animrec/vaccinated": "2",
"rpt_animrec/calved": "2",
"rpt_animrec/grp_animrec/tagid": "TZN000404015237",
"rpt_animrec/injuries": "2",
"rpt_animrec/anim_weight/bodyscore": "3.0",
"rpt_animrec/grp_service/dateserv": "2017-02-09",
"rpt_animrec/anim_weight/weight": "350.0",
"rpt_animrec/siredetails/sirecnty": "Nill",
"rpt_animrec/grp_servicedtls/servechange": "2",
"rpt_animrec/grp_servicedtls/servsourcebull": "1",
"rpt_animrec/parasites": "2",
"rpt_animrec/grp_feedwater/watertype": "3",
"rpt_animrec/served": "1",
"rpt_animrec/grp_animrec/cowmilked": "2",
"rpt_animrec/grp_servicedtls/servicechangeyes": "1",
"rpt_animrec/drydate": "2017-07-22",
"rpt_animrec/grp_servicedtls/sercost": "15000.0",
"rpt_animrec/anim_weight/heartgirth": "170.0",
"rpt_animrec/siredetails/sirecomp": "5",
"rpt_animrec/grp_service/servtype": "1"
}
],
"_bamboo_dataset_id": "",
"start_time": "2017-10-03T13:25:01.529+03",
"_uuid": "0b3b8257-f154-42e8-ae17-119bfa04ceca",
"_tags": [],
"grp-milkuse/milkprice": "0.0",
"_userform_id": "adggtnz_ADGG-LNG01-20170725",
"_submitted_by": null,
"meta/instanceName": "ADGG-LNG01-20170725-HH0753628391",
"enumtype": "2",
"hh_village": "4835",
"grp-milkuse/milkconsumed": "0.0",
"_submission_time": "2017-10-05T18:35:19",
"_version": "20170725",
"_attachments": [],
"end_time": "2017-10-03T13:31:28.876+03",
"hh_kebele": "1807",
"_id": 369982
}
我想像这样循环几个json文件并更改tagid [“rpt_animrec / grp_animrec / tagid”:“TZN000404015236”,]以使用不同的前缀。
我想写一个python脚本,它将遍历几个这样的json文件并更改tagid。这是我到目前为止的代码
import json
import os
#python code to update TZN to ETH
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
if json_file.endswith(".json"):
processed_json = "%s%s" % (json_dir_processed, json_file)
json_file = json_dir + json_file
print "Processing %s -> %s" % (json_file, processed_json)
with open(json_file, 'r') as f:
json_data = json.load(f)
json_data['TZN'] = json_data['ETH']
with open(processed_json, 'w') as f:
f.write(json.dumps(json_data, indent=4))
else:
print "%s not a JSON file" % json_file
答案 0 :(得分:1)
import json
import os
def process(fp):
json_data = json.loads(fp.read())
for d in json_data.get('rpt_animrec', []):
key = "rpt_animrec/grp_animrec/tagid"
d[key] = d[key].replace('TZN', 'ETH')
return json.dumps(json_data, indent=4)
source_path = '/opt/new/file/20180116/'
dest_path = '/opt/new/file/20180116updated/'
for path, dirnames, fnames in os.walk(source_path, topdown=True):
for fname in filter(lambda name: name.endswith('.json'), fnames):
full_path = os.path.join(path, fname)
end_path = os.path.join(dest_path, fname)
print('Processing: {full_path} => {dest_path}'.format(
full_path=full_path,
dest_path=end_path
))
with open(full_path, 'r') as in_fp, open(dest_path, 'w') as out_fp:
out_fp.write(process(in_fp))
如果你想遍历整个路径(包括子目录),可以尝试这样的事情,或者如果你不这样做,可以将topdown
更改为False
。
答案 1 :(得分:1)
如果您只想将非特定位置中的一个子字符串替换为另一个子字符串,为什么还要解析json?只需直接操作文件对象返回的json字符串即可。你甚至不需要json模块。
import os
#python code to update TZN to ETH
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
if json_file.endswith(".json"):
processed_json = "%s%s" % (json_dir_processed, json_file)
json_file = "%s%s" % (json_dir, json_file)
print "Processing %s -> %s" % (json_file, processed_json)
open(processed_json, 'w').write(
open(json_file).read().replace('TZN', 'ETH'))
else:
print "%s not a JSON file" % json_file
答案 2 :(得分:1)
import json
import glob
import os
import re
from pathlib import Path
def get_json_files():
return glob.glob('/home/test/PycharmProjects/test/*.json')
def read_json_file(filename):
with open(filename) as f:
data = json.load(f)
return data
def process_json(json_data):
for selection in json_data['rpt_animrec']:
if selection['rpt_animrec/grp_animrec/tagid']:
processed_data = selection['rpt_animrec/grp_animrec/tagid']
processed_data = re.sub('TZN', "ETH", processed_data)
selection['rpt_animrec/grp_animrec/tagid'] = processed_data
return json_data
def write_json(json_data, file_path):
filename = os.path.basename(file_path)
json_dir_processed = Path("/home/test/PycharmProjects/test/processed/{}".format(filename))
with open(json_dir_processed, 'w') as f:
f.write(json.dumps(json_data, indent=4))
def process_json_files(json_files):
for json_file in json_files:
print("Processing {}".format(json_file))
json_data = read_json_file(json_file)
write_json(process_json(json_data), json_file)
if __name__ == '__main__':
json_files_list = get_json_files()
process_json_files(json_files_list)
这就是我要做出改变的方法。