我对Python很陌生,我需要在python 2.7中使用标准库仅和 从在线压缩的CSV文件中创建嵌套JSON。我已经找到了访问和解压缩文件但是在解析时遇到了一些麻烦。基本上,我需要为每个主键创建一个包含三个高级元素的JSON输出:
元数据字典(产品,流式,单位,以及理想情况下每个观察点质量的嵌套时间序列。
from StringIO import StringIO
from urllib import urlopen
from zipfile
import ZipFile from datetime
import datetime import itertools as it
import csv
import sys
url = urlopen("https://www.jodidata.org/_resources/files/downloads/gas-data/jodi_gas_csv_beta.zip")
myzip = ZipFile(StringIO(url.read()))
with myzip.open('jodi_gas_beta.csv','r' ) as myCSV:
#Read the data
reader=csv.DictReader(myCSV)
#Sort the data by PK + Time for timeseries
reader=sorted(reader,key=lambda row: row['REF_AREA'],row['ENERGY_PRODUCT'],row['FLOW_BREAKDOWN'],row['UNIT_MEASURE'],row['TIME_PERIOD']))
#initialize dictionaries for output
myData=[]
keys=[]
groups=[]
#limiting to first 200 rows for testing ONLY
for k, g in it.groupby(list(it.islice(reader,200)),key=lambda row: row['REF_AREA'],row['ENERGY_PRODUCT'],row['FLOW_BREAKDOWN'],row['UNIT_MEASURE'])):
keys.append(k)
groups.append(list(g))
myData.append({'MyPK': ''.join(k), #captures the PKs
'TimeSeries' : dict((zip(e['TIME_PERIOD'],e['OBS_VALUE']))) for e in g], #Not working properly, want a time series dictionary here
#TODO: Dictionary of metadata here (with nested time series, if possible)})
#TODO: Output as a JSON string
所以,结果看起来应该是这样的:
{
"myPK": "AENATGASEXPLNGM3",
"TimeSeries":[
["2015-01", 756],
["2015-02", 572],
["2015-03", 654]
],
"Metadata":{
"Country":"AE",
"Product":"NATGAS",
"Flow":"EXPLNG",
"Unit":"M3",
"Quality:[
["2015-01", 3],
["2015-02", 3],
["2015-03", 3]
]
}
}
答案 0 :(得分:0)
虽然您似乎没有花费太多精力自己解决问题,但我认为这就是您想要的。它利用operator.itemgetter()
函数简化从各种容器中检索一系列不同的项目(例如' list s and
dict`s)。
我还修改了代码,使其更加贴近PEP 8 - Style Guide for Python Code。
import datetime
import csv
from operator import itemgetter
import itertools as it
import json
from StringIO import StringIO
import sys
from urllib import urlopen
from zipfile import ZipFile
# Utility.
def typed_itemgetter(items, callables):
""" Like operator.itemgetter() but also applies corresponding callable to
each retrieved value if it's not None. Creates and returns a function.
"""
return lambda row: [f(value) if f else value
for value, f in zip(itemgetter(*items)(row), callables)]
url = urlopen("https://www.jodidata.org/_resources/files/downloads/gas-data/jodi_gas_csv_beta.zip")
myzip = ZipFile(StringIO(url.read()))
with myzip.open('jodi_gas_beta.csv', 'r' ) as myCSV:
reader = csv.DictReader(myCSV)
primary_key = itemgetter('REF_AREA', 'ENERGY_PRODUCT', 'FLOW_BREAKDOWN', 'UNIT_MEASURE',
'TIME_PERIOD')
reader = sorted(reader, key=primary_key)
# Limit to first 200 rows for TESTING.
reader = [row for row in it.islice(reader, 200)]
# Group the data by designated keys (aka "primary key").
keys, groups = [], []
keyfunc = itemgetter('REF_AREA', 'ENERGY_PRODUCT', 'FLOW_BREAKDOWN', 'UNIT_MEASURE')
for k, g in it.groupby(reader, key=keyfunc):
keys.append(k)
groups.append(list(g))
# Create corresponding JSON-like Python data-structure.
myData = []
for i, group in enumerate(groups):
result = {'myPK': ''.join(keys[i]),
'TimeSeries': [
typed_itemgetter(('TIME_PERIOD', 'OBS_VALUE'),
(None, lambda x: int(float(x))))(row)
for row in group]
}
metadata = dict(zip(("Country", "Product", "Flow", "Unit"), keys[i]))
metadata['Quality'] = [typed_itemgetter(
('TIME_PERIOD', 'ASSESSMENT_CODE'), (None, int))(row)
for row in group]
result['Metadata'] = metadata
myData.append(result)
# Display the data to be turned into JSON.
from pprint import pprint
print('myData:')
pprint(myData)
# To create JSON format output, use something like:
import json
with open('myData.json', 'w') as fp:
json.dump(myData, fp, indent=2)
打印输出的开头部分:
myData:
[{'Metadata': {'Country': 'AE',
'Flow': 'EXPLNG',
'Product': 'NATGAS',
'Quality': [['2015-01', 3],
['2015-02', 3],
['2015-03', 3],
['2015-04', 3],
['2015-05', 3],
['2015-06', 3],
['2015-07', 3],
['2015-08', 3],
['2015-09', 3],
['2015-10', 3],
['2015-11', 3],
['2015-12', 3],
['2016-01', 3],
['2016-02', 3],
['2016-04', 3],
['2016-05', 3]],
'Unit': 'M3'},
'TimeSeries': [['2015-01', 756],
['2015-02', 572],
['2015-03', 654],
['2015-04', 431],
['2015-05', 681],
['2015-06', 683],
['2015-07', 751],
['2015-08', 716],
['2015-09', 830],
['2015-10', 580],
['2015-11', 659],
['2015-12', 659],
['2016-01', 742],
['2016-02', 746],
['2016-04', 0],
['2016-05', 0]],
'myPK': 'AENATGASEXPLNGM3'},
{'Metadata': {'Country': 'AE',
'Flow': 'EXPPIP',
'Product': 'NATGAS',
'Quality': [['2015-01', 3],
['2015-02', 3],
['2015-03', 3],
['2015-04', 3],
['2015-05', 3],
['2015-06', 3],
['2015-07', 3],
['2015-08', 3],
['2015-09', 3],
['2015-10', 3],
['2015-11', 3],
['2015-12', 3],
['2016-01', 3],
['2016-02', 3],
['2016-03', 3],
['2016-04', 3],
# etc, etc...
]