这非常基本:几个月前我成功使用python在单个目录中解析json文件。但现在我无法弄清楚我是如何调整它的(队友想出了代码),以便我可以将数据转换为更有用的csv格式。
目前,在使用Python Launcher或Terminal时,我正处于zilch状态。
解析器的样子:
import codecs
import json
import os
import sys
try:
import unicodecsv as csv
except ImportError:
import csv
OUTPUT_FILE = 'output.csv'
def process_file(infile, writer):
print('Processing file: %s' % infile)
with codecs.open(infile, encoding='utf-8') as infile:
data = json.load(infile)
for item in data:
_id = item['id']
description = item['description']
for gov in item['source']:
gov_id = gov['name']
for source in item['secondarySource']:
source_id = source['sourceId']
name = source['name']
party = source['party']
writer.writerow([_id, description, gov_id, source_id, name, party])
def process_files_in_directory(directory, outfile):
with codecs.open(outfile, 'w') as outfile:
writer = csv.writer(outfile)
writer.writerow(["id", "description", "branch", "sourceID", "name", "party"])
for f in os.listdir(path):
if f.endswith('.json'):
process_file(f, writer)
USAGE = """
Usage:
python json_parser.py <source_directory> [<output_file>]
Where source_directory is path to directory with input JSON files.
output_file is optional -- defaults to %s
File names must end with .json
""" % OUTPUT_FILE
if __name__=='__main__':
try:
directory = sys.argv[1]
except IndexError:
print(USAGE)
sys.exit(0)
if len(sys.argv) > 2:
outfile = sys.argv[2]
else:
outfile = OUTPUT_FILE
process_files_in_directory(directory, outfile)
答案 0 :(得分:0)
您的脚本存在一些格式问题。我不确定它们是否与问题有关...这是你脚本的新版本。基本思想有效,但您可能希望格式化CSV输出以使其更具可读性。为了证明这是有效的,我从命令行运行:
python stackoverflow\junk.py stackoverflow\mydir
stackoverflow\mydir
有两个文件:one.json
和two.json
。
以下代码包含上述评论中的修复
import codecs
import json
import os
import sys
try:
import unicodecsv as csv
except ImportError:
import csv
OUTPUT_FILE = 'output.csv'
def process_file(infile, writer):
print('Processing file: %s' % infile)
with codecs.open(infile, encoding='utf-8') as infile:
data = json.load(infile)
for item in data:
_id = item['id']
description = item['description']
for gov in item['source']:
gov_id = gov['name']
for source in item['secondarySource']:
source_id = source['sourceId']
name = source['name']
party = source['party']
writer.writerow([_id, description, gov_id, source_id, name, party])
def process_files_in_directory(directory, outfile):
with codecs.open(outfile, 'w') as outfile:
writer = csv.writer(outfile)
writer.writerow(["id", "description", "branch", "sourceID", "name", "party"])
for f in os.listdir(directory):
if f.endswith('.json'):
process_file(os.path.join(directory, f), writer)
USAGE = """
Usage:
python json_parser.py <source_directory> [<output_file>]
Where source_directory is path to directory with input JSON files.
output_file is optional -- defaults to %s
File names must end with .json
""" % OUTPUT_FILE
if __name__ == '__main__':
try:
directory = sys.argv[1]
except IndexError:
print(USAGE)
sys.exit(0)
if len(sys.argv) > 2:
outfile = sys.argv[2]
else:
outfile = OUTPUT_FILE
process_files_in_directory(directory, outfile)