我有一个json文件格式:
[
{
"2018-05-11 09:45:10": {
"change": 6.4,
"change rate": 1.58,
"code": "00700",
"current": 412.6,
"market_cap": 39212.21,
"turnover": 419550479.8,
"volume": 102009800.0
}
},
{
"2018-05-11 09:45:20": {
"change": 6.8,
"change rate": 1.67,
"code": "00700",
"current": 413.0,
"market_cap": 39250.23,
"turnover": 493879299.8,
"volume": 120029800.0
}
}, ... ]
我想将json文件转换为dataframe格式:
time code current change change rate market_cap \
0 2018-05-11 09:45:10 00700 412.6 6.4 1.58 39212.21
1 2018-05-11 09:45:20 00700 413.0 6.8 1.67 39250.23
2 2018-05-11 09:45:30 00700 413.2 7.0 1.72 39269.23
3 2018-05-11 09:45:40 00700 413.0 6.8 1.67 39250.23
4 2018-05-11 09:45:50 00700 413.0 6.8 1.67 39250.23
...
turnover volume
0 4.195505e+08 102009800.0
1 4.938793e+08 120029800.0
2 5.581315e+08 135588900.0
3 5.804374e+08 140989900.0
4 5.956777e+08 144679900.0
...
这是我的代码:
def convert_json_file_to_df(file_path):
with open(file_path, encoding='utf-8') as fh:
jd = json.load(fh, encoding='utf-8')
col_list = ["time", "code", "current", "change", "change rate", "market_cap", "turnover", "volume"]
df = pd.DataFrame(columns=col_list)
for d in jd:
for key, value in d.items():
df = df.append({"time": key,
"code": value["code"],
"current": value["current"],
"change": value["change"],
"change rate": value["change rate"],
"market_cap": value["market_cap"],
"turnover": value["turnover"],
"volume": value["volume"]
}, ignore_index=True)
print(df)
我希望有一种简短而有效的方法将json文件转换为dataframe。我写的代码,我认为这是一个缓慢而不好看的代码。有没有更有效的方法?另一个问题是如何以dict格式附加json文件?非常感谢
更新: 追加json文件的代码
def save_dict_to_json_file(dict, filepath):
if ((type(dict)!=type({})) or (not dict) or (not filepath)):
return FALSE
try:
with open(filepath, encoding='utf-8') as f:
json_data = json.load(f, encoding='utf-8')
json_data.append(dict)
with open(filepath, mode='w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=2, sort_keys=True)
return TRUE
except Exception as e:
traceback.print_exc()
err = sys.exc_info()[1]
return FALSE, str(err)
答案 0 :(得分:0)
我认为您可以使用list comprehension
来嵌套嵌套字典,add新元素time
和最后DataFrame
个构造函数:
L = [dict(v, time=k) for x in jd for k, v in x.items()]
print (L)
[{'change': 6.4, 'change rate': 1.58, 'code': '00700', 'current': 412.6,
'market_cap': 39212.21, 'turnover': 419550479.8, 'volume': 102009800.0,
'time': '2018-05-11 09:45:10'},
{'change': 6.8, 'change rate': 1.67, 'code': '00700', 'current': 413.0,
'market_cap': 39250.23, 'turnover': 493879299.8, 'volume': 120029800.0,
'time': '2018-05-11 09:45:20'}]
col_list = ["time", "code", "current", "change",
"change rate", "market_cap", "turnover", "volume"]
df = pd.DataFrame(L, columns=col_list)
print (df)
time code current change change rate market_cap \
0 2018-05-11 09:45:10 00700 412.6 6.4 1.58 39212.21
1 2018-05-11 09:45:20 00700 413.0 6.8 1.67 39250.23
turnover volume
0 419550479.8 102009800.0
1 493879299.8 120029800.0