我有一张美国所有州/县的CSV。我想以下列格式将其转换为JSON:
Alabma: [
{
"county_name":"Alabama -- Statewide",
"fips":1000,
"fips2":"'01000'",
},
但是我为此写的python会产生以下内容
State: [{
"county_name":"Baldwin County",
"fips":1003,
"fips2":"'01003'",
"state_name":" Alabama"
},
我想我只需要一个方向,我可以解决这个问题。谢谢你的帮助!这是我的python:
import csv
import json
output = { 'state':[] }
with open('county_state.csv', 'rU') as csv_file:
for state_name in csv.DictReader(csv_file):
output['state'].append({
'fips': state_name['fips2'],
'county': state_name['county_name']
})
print json.dumps(output)
CSV文件中的一些示例行:
county_name fips fips2 state_name
Autauga County 01001 '01001' Alabama
Baldwin County 01003 '01003' Alabama
Barbour County 01005 '01005' Alabama
Putnam County 12107 '12107' Florida
St. Johns County 12109 '12109' Florida
St. Lucie County 12111 '12111' Florida
Santa Rosa County 12113 '12113' Florida
Emmet County 19063 '19063' Iowa
Fayette County 19065 '19065' Iowa
Floyd County 19067 '19067' Iowa
Franklin County 19069 '19069' Iowa
Fremont County 19071 '19071' Iowa
Greene County 19073 '19073' Iowa
Grundy County 19075 '19075' Iowa
Guthrie County 19077 '19077' Iowa
Hamilton County 19079 '19079' Iowa
Hancock County 19081 '19081' Iowa
答案 0 :(得分:0)
您的数据似乎有混合分隔符,如果您可以将其统一到制表符,例如,这应该是解决方案。
dictreader = DictReader(csvdata, delimiter='\t', quoting=QUOTE_NONE)
output = {}
for state in dictreader:
if not output.get(state['state_name']):
output[state['state_name']] = []
output[state['state_name']].append({'county_name': state['county_name'], 'fips': state['fips'], 'fips2': state['fips2'], 'state_name': state['state_name']})
答案 1 :(得分:0)
我认为您的输入CSV文件很可能是由制表符分隔,而不是空格。如果是这种情况,那么这似乎以您想要的格式生成JSON:
from collections import defaultdict, OrderedDict
import csv
import json
output = defaultdict(list)
with open('county_state.csv', 'rb') as csv_file:
reader = csv.DictReader(csv_file, delimiter='\t')
for row in reader:
output[row['state_name']].append(
OrderedDict((
(fieldname, row[fieldname]) for fieldname in reader.fieldnames
if fieldname != 'state_name')))
# sort the output by state (optional)
output = OrderedDict(((state, counties) for state, counties in
sorted(output.iteritems())))
print json.dumps(output, indent=2)
输出:
{
"Alabama": [
{
"county_name": "Autauga County",
"fips": "01001",
"fips2": "'01001'"
},
{
"county_name": "Baldwin County",
"fips": "01003",
"fips2": "'01003'"
},
{
"county_name": "Barbour County",
"fips": "01005",
"fips2": "'01005'"
}
],
"Florida": [
{
"county_name": "Putnam County",
"fips": "12107",
"fips2": "'12107'"
},
{
"county_name": "St. Johns County",
"fips": "12109",
"fips2": "'12109'"
},
{
"county_name": "St. Lucie County",
"fips": "12111",
"fips2": "'12111'"
},
{
"county_name": "Santa Rosa County",
"fips": "12113",
"fips2": "'12113'"
}
],
"Iowa": [
{
"county_name": "Emmet County",
"fips": "19063",
"fips2": "'19063'"
},
{
"county_name": "Fayette County",
"fips": "19065",
"fips2": "'19065'"
},
{
"county_name": "Floyd County",
"fips": "19067",
"fips2": "'19067'"
},
{
"county_name": "Franklin County",
"fips": "19069",
"fips2": "'19069'"
},
{
"county_name": "Fremont County",
"fips": "19071",
"fips2": "'19071'"
},
{
"county_name": "Greene County",
"fips": "19073",
"fips2": "'19073'"
},
{
"county_name": "Grundy County",
"fips": "19075",
"fips2": "'19075'"
},
{
"county_name": "Guthrie County",
"fips": "19077",
"fips2": "'19077'"
},
{
"county_name": "Hamilton County",
"fips": "19079",
"fips2": "'19079'"
},
{
"county_name": "Hancock County",
"fips": "19081",
"fips2": "'19081'"
}
]
}