我最近更新到pandas 0.20.1并尝试使用to_json的新功能(orient =' table')
import pandas as pd
pd.__version__
# '0.20.1'
a = pd.DataFrame({'a':[1,2,3], 'b':[4,5,6]})
a.to_json('a.json', orient='table')
但是如何将此JSON文件读取到DataFrame?
我尝试了pd.read_json('a.json', orient='table')
,但提出了ValueError
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-22-7527b25107ef> in <module>()
----> 1 pd.read_json('a.json', orient='table')
C:\Anaconda3\lib\site-packages\pandas\io\json\json.py in read_json(path_or_buf, orient, typ, dtype, convert_axes, convert_dates, keep_default_dates, numpy, precise_float, date_unit, encoding, lines)
352 obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
353 keep_default_dates, numpy, precise_float,
--> 354 date_unit).parse()
355
356 if typ == 'series' or obj is None:
C:\Anaconda3\lib\site-packages\pandas\io\json\json.py in parse(self)
420
421 else:
--> 422 self._parse_no_numpy()
423
424 if self.obj is None:
C:\Anaconda3\lib\site-packages\pandas\io\json\json.py in _parse_no_numpy(self)
650 else:
651 self.obj = DataFrame(
--> 652 loads(json, precise_float=self.precise_float), dtype=None)
653
654 def _process_converter(self, f, filt=None):
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
273 dtype=dtype, copy=copy)
274 elif isinstance(data, dict):
--> 275 mgr = self._init_dict(data, index, columns, dtype=dtype)
276 elif isinstance(data, ma.MaskedArray):
277 import numpy.ma.mrecords as mrecords
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _init_dict(self, data, index, columns, dtype)
409 arrays = [data[k] for k in keys]
410
--> 411 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
412
413 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
5592 # figure out the index, if necessary
5593 if index is None:
-> 5594 index = extract_index(arrays)
5595 else:
5596 index = _ensure_index(index)
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in extract_index(data)
5643
5644 if have_dicts:
-> 5645 raise ValueError('Mixing dicts with non-Series may lead to '
5646 'ambiguous ordering.')
5647
ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.
那么有没有办法可以读取该JSON文件?提前谢谢。
PS:JSON文件如下所示:
{"schema": {"pandas_version":"0.20.0","fields":[{"type":"integer","name":"index"},{"type":"integer","name":"a"},{"type":"integer","name":"b"}],"primaryKey":["index"]}, "data": [{"index":0,"a":1,"b":4},{"index":1,"a":2,"b":5},{"index":2,"a":3,"b":6}]}
答案 0 :(得分:5)
显然,新方法会将数据集中的一些元数据输出到json中,例如pandas版本。因此,请考虑使用内置的json
模块读取此嵌套对象以提取 data 键的值:
import json
...
with open('a.json', 'r') as f:
json_obj = json.loads(f.read())
df = pd.DataFrame(json_obj['data']).set_index('index')
df.index.name = None
print(df)
# a b
# 0 1 4
# 1 2 5
# 2 3 6
如果您打算使用 type 和 name ,请在嵌套json中对这些部分运行字典和列表解析。虽然在这里,整数必须切成 int 。无法使用 dtype 参数,因为直到步骤之后才会保存名称:
with open('a.json', 'r') as f:
json_obj = json.loads(f.read())
df = pd.DataFrame(json_obj['data'], columns=[t['name']
for t in json_obj['schema']['fields']])
df = df.astype(dtype={t['name']: t['type'][:3]
for t in json_obj['schema']['fields']}).set_index('index')
df.index.name = None
print(df)
# a b
# 0 1 4
# 1 2 5
# 2 3 6
答案 1 :(得分:1)
这是我从Parfait回答的一个函数:
def table_to_df(table):
df = pd.DataFrame(table['data'],
columns=[t['name'] for t in table['schema']['fields']])
for t in table['schema']['fields']:
if t['type'] == "datetime":
df[t['name']] = pd.to_datetime(df[t['name']], infer_datetime_format=True)
df.set_index(table['schema']['primaryKey'], inplace=True)
return df