Question

我想循环遍历一堆像

这样的几个json文件

{
"_notes": [], 
"deviceid": "353422071089618", 
"grp-milkuse/milksold": "0.0", 
"hh_id": "0753628391", 
"regdate": "2017-10-03", 
"gpsloc": "-9.10112605948487 32.897751368000414 1488.8210801196851 4.0", 
"grp-milkuse/milkprocess": "0.0", 
"_xform_id_string": "ADGG-LNG01-20170725", 
"hh_country": "2", 
"meta/instanceID": "uuid:0b3b8257-f154-42e8-ae17-119bfa04ceca", 
"_duration": "", 
"visitdate": "2017-10-03", 
"hh_region": "1007", 
"_geolocation": [
    -9.10112605948487, 
    32.897751368000414
], 
"hh_district": "1037", 
"datacollid": "0758990688", 
"grp-milkuse/milkcalf": "0.0", 
"_status": "submitted_via_web", 
"formhub/uuid": "98dfbfd65ef24a92a46d6f794e748627", 
"rpt_animrec": [
    {
        "rpt_animrec/grp_animrec/cowmilked": "3", 
        "rpt_animrec/calved": "2", 
        "rpt_animrec/grp_animrec/tagid": "TZN000404015233", 
        "rpt_animrec/injuries": "2", 
        "rpt_animrec/anim_weight/heartgirth": "150.0", 
        "rpt_animrec/anim_weight/bodyscore": "3.0", 
        "rpt_animrec/parasites": "2", 
        "rpt_animrec/grp_feedwater/watertype": "3", 
        "rpt_animrec/anim_weight/weight": "200.0", 
        "rpt_animrec/grp_feedwater/feedtype": "3 4", 
        "rpt_animrec/vaccinated": "2", 
        "rpt_animrec/served": "2"
    }, 
    {
        "rpt_animrec/sirehastag": "2", 
        "rpt_animrec/siredetails/sirename": "Nil", 
        "rpt_animrec/grp_feedwater/feedtype": "2 3 4", 
        "rpt_animrec/siredetails/sirebreed": "2", 
        "rpt_animrec/vaccinated": "2", 
        "rpt_animrec/calved": "2", 
        "rpt_animrec/grp_animrec/tagid": "TZN000404015236", 
        "rpt_animrec/injuries": "2", 
        "rpt_animrec/anim_weight/bodyscore": "3.0", 
        "rpt_animrec/grp_service/dateserv": "2016-12-22", 
        "rpt_animrec/anim_weight/weight": "250.0", 
        "rpt_animrec/siredetails/sirecnty": "Nil", 
        "rpt_animrec/grp_servicedtls/servechange": "2", 
        "rpt_animrec/grp_servicedtls/servsourcebull": "1", 
        "rpt_animrec/parasites": "2", 
        "rpt_animrec/grp_feedwater/watertype": "3", 
        "rpt_animrec/served": "1", 
        "rpt_animrec/grp_animrec/cowmilked": "3", 
        "rpt_animrec/grp_servicedtls/servicechangeyes": "1", 
        "rpt_animrec/grp_servicedtls/sercost": "15000.0", 
        "rpt_animrec/anim_weight/heartgirth": "160.0", 
        "rpt_animrec/siredetails/sirecomp": "5", 
        "rpt_animrec/grp_service/servtype": "1"
    }, 
    {
        "rpt_animrec/sirehastag": "2", 
        "rpt_animrec/siredetails/sirename": "Nill", 
        "rpt_animrec/grp_feedwater/feedtype": "1 3 4", 
        "rpt_animrec/siredetails/sirebreed": "2", 
        "rpt_animrec/vaccinated": "2", 
        "rpt_animrec/calved": "2", 
        "rpt_animrec/grp_animrec/tagid": "TZN000404015237", 
        "rpt_animrec/injuries": "2", 
        "rpt_animrec/anim_weight/bodyscore": "3.0", 
        "rpt_animrec/grp_service/dateserv": "2017-02-09", 
        "rpt_animrec/anim_weight/weight": "350.0", 
        "rpt_animrec/siredetails/sirecnty": "Nill", 
        "rpt_animrec/grp_servicedtls/servechange": "2", 
        "rpt_animrec/grp_servicedtls/servsourcebull": "1", 
        "rpt_animrec/parasites": "2", 
        "rpt_animrec/grp_feedwater/watertype": "3", 
        "rpt_animrec/served": "1", 
        "rpt_animrec/grp_animrec/cowmilked": "2", 
        "rpt_animrec/grp_servicedtls/servicechangeyes": "1", 
        "rpt_animrec/drydate": "2017-07-22", 
        "rpt_animrec/grp_servicedtls/sercost": "15000.0", 
        "rpt_animrec/anim_weight/heartgirth": "170.0", 
        "rpt_animrec/siredetails/sirecomp": "5", 
        "rpt_animrec/grp_service/servtype": "1"
    }
], 
"_bamboo_dataset_id": "", 
"start_time": "2017-10-03T13:25:01.529+03", 
"_uuid": "0b3b8257-f154-42e8-ae17-119bfa04ceca", 
"_tags": [], 
"grp-milkuse/milkprice": "0.0", 
"_userform_id": "adggtnz_ADGG-LNG01-20170725", 
"_submitted_by": null, 
"meta/instanceName": "ADGG-LNG01-20170725-HH0753628391", 
"enumtype": "2", 
"hh_village": "4835", 
"grp-milkuse/milkconsumed": "0.0", 
"_submission_time": "2017-10-05T18:35:19", 
"_version": "20170725", 
"_attachments": [], 
"end_time": "2017-10-03T13:31:28.876+03", 
"hh_kebele": "1807", 
"_id": 369982

}

我想像这样循环几个json文件并更改tagid [“rpt_animrec / grp_animrec / tagid”：“TZN000404015236”，]以使用不同的前缀。

我想写一个python脚本，它将遍历几个这样的json文件并更改tagid。这是我到目前为止的代码

import json
import os
#python code to update TZN to ETH 
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
    if json_file.endswith(".json"):
        processed_json = "%s%s" % (json_dir_processed, json_file)
        json_file = json_dir + json_file
        print "Processing %s -> %s" % (json_file, processed_json)
        with open(json_file, 'r') as f:
            json_data = json.load(f)
            json_data['TZN'] = json_data['ETH']
        with open(processed_json, 'w') as f:
            f.write(json.dumps(json_data, indent=4))
    else:
        print "%s not a JSON file" % json_file

Answer 1

import json
import os

def process(fp):
    json_data = json.loads(fp.read())
    for d in json_data.get('rpt_animrec', []):
        key = "rpt_animrec/grp_animrec/tagid"
        d[key] = d[key].replace('TZN', 'ETH')

    return json.dumps(json_data, indent=4)


source_path = '/opt/new/file/20180116/'
dest_path = '/opt/new/file/20180116updated/'

for path, dirnames, fnames in os.walk(source_path, topdown=True):
    for fname in filter(lambda name: name.endswith('.json'), fnames):
        full_path = os.path.join(path, fname)
        end_path = os.path.join(dest_path, fname)
        print('Processing: {full_path} => {dest_path}'.format(
            full_path=full_path,
            dest_path=end_path
        ))
        with open(full_path, 'r') as in_fp, open(dest_path, 'w') as out_fp:
            out_fp.write(process(in_fp))

如果你想遍历整个路径（包括子目录），可以尝试这样的事情，或者如果你不这样做，可以将topdown更改为False。

Answer 2

如果您只想将非特定位置中的一个子字符串替换为另一个子字符串，为什么还要解析json？只需直接操作文件对象返回的json字符串即可。你甚至不需要json模块。

import os
#python code to update TZN to ETH
json_dir="/opt/new/file/20180116/"
json_dir_processed="/opt/new/file/20180116updated/"
for json_file in os.listdir(json_dir):
    if json_file.endswith(".json"):
        processed_json = "%s%s" % (json_dir_processed, json_file)
        json_file = "%s%s" % (json_dir, json_file)
        print "Processing %s -> %s" % (json_file, processed_json)
        open(processed_json, 'w').write(
            open(json_file).read().replace('TZN', 'ETH'))
    else:
        print "%s not a JSON file" % json_file

Answer 3

import json
import glob
import os
import re

from pathlib import Path


def get_json_files():
    return glob.glob('/home/test/PycharmProjects/test/*.json')


def read_json_file(filename):
    with open(filename) as f:
        data = json.load(f)

    return data


def process_json(json_data):
    for selection in json_data['rpt_animrec']:
        if selection['rpt_animrec/grp_animrec/tagid']:
            processed_data = selection['rpt_animrec/grp_animrec/tagid']
            processed_data = re.sub('TZN', "ETH", processed_data)
            selection['rpt_animrec/grp_animrec/tagid'] = processed_data
    return json_data


def write_json(json_data, file_path):
    filename = os.path.basename(file_path)
    json_dir_processed = Path("/home/test/PycharmProjects/test/processed/{}".format(filename))
    with open(json_dir_processed, 'w') as f:
        f.write(json.dumps(json_data, indent=4))


def process_json_files(json_files):
    for json_file in json_files:
        print("Processing {}".format(json_file))
        json_data = read_json_file(json_file)
        write_json(process_json(json_data), json_file)


if __name__ == '__main__':
    json_files_list = get_json_files()
    process_json_files(json_files_list)

这就是我要做出改变的方法。

用于更新JSON文件的python脚本

3 个答案: