在尝试熟悉Python中的DataFrame时,我陷入了以下困境......
我尝试从数组中创建一个数据框:
from numpy import *
import pandas as pd
dat = array([[0.3888888888888889, 0.3888888888888889, 0.3888888888888889, 0.436943311456892, 0.7905900031193156, 0.5020468092219706, 0.8389717734280283, 0.7604923090797432, 0.8206054422776056, '0'],
[0.3888888888888889, 0.3888888888888889, 0.2222222222222222, 0.4457200178477334, 0.8114779465247448, 0.506899600792241, 0.8368566485573798, 0.760617288778523, 0.8195489478905984, '1'],
[0.2777777777777778, 0.2777777777777778, 0.05555555555555555, 0.4426231291814084, 0.7883413226205706, 0.5037172133121759, 0.8370362549229062, 0.7599752704033258, 0.8184218722901648, '2'],
[0.1111111111111111, 0.1111111111111111, 0.16666666666666666, 0.4651807845446571, 0.7983379003654792, 0.5250604537887904, 0.8463875215362144, 0.7533582308429306, 0.8241548325954007, '3'],
[0.5000000000000001, 0.5000000000000001, 0.3333333333333333, 0.4457200178477334, 0.7878040593905666, 0.506899600792241, 0.8368566485573798, 0.7605016058324149, 0.8195489478905984, '4'],
[0.3888888888888889, 0.3888888888888889, 0.2222222222222222, 0.44943322185630036, 0.7843622888520198, 0.5055757644148106, 0.8351253941103399, 0.7604171267769607, 0.8185442945328569, '5'],
[0.3888888888888889, 0.3888888888888889, 0.3333333333333333, 0.4424914587425397, 0.7877430312713435, 0.5029950110274568, 0.836692391332608, 0.760611529525946, 0.8198150075184326, '6'],
[0.3333333333333333, 0.05555555555555555, 0.7777777777777778, 0.4389415113841421, 0.7878040593905666, 0.506899600792241, 0.8368566485573798, 0.7605016058324149, 0.8195489478905984, '7'],
[0.4444444444444444, 0.4444444444444444, 0.4444444444444444, 0.42770705188736874, 0.7976039510596705, 0.5057230657076256, 0.8368566485573798, 0.7605016058324149, 0.8195489478905984, '8'],
[0.2222222222222222, 0.2777777777777778, 0.5000000000000001, 0.43182322765312314, 0.7971732873351607, 0.5072390458086798, 0.84541364942531, 0.7613416598875292, 0.8239037851005895, '9']])
dat = pd.DataFrame([dat], columns = ["Var 1", "Var 2", "Var 3", "Var 4", "Var 5", "Var 6", "Var 7", "Var 8", "Var 9", "Var 10"])
...我收到以下错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-68-e8aa575cd3c7> in <module>()
----> 1 dat = pd.DataFrame([dat], columns = ["Var 1", "Var 2", "Var 3", "Var 4", "Var 5", "Var 6", "Var 7", "Var 8", "Var 9", "Var 10"])
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
277 else:
278 mgr = self._init_ndarray(data, index, columns, dtype=dtype,
--> 279 copy=copy)
280 else:
281 mgr = self._init_dict({}, index, columns, dtype=dtype)
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _init_ndarray(self, values, index, columns, dtype, copy)
410 # by definition an array here
411 # the dtypes will be coerced to a single dtype
--> 412 values = _prep_ndarray(values, copy=copy)
413
414 if dtype is not None:
C:\Anaconda3\lib\site-packages\pandas\core\frame.py in _prep_ndarray(values, copy)
5323 values = values.reshape((values.shape[0], 1))
5324 elif values.ndim != 2:
-> 5325 raise ValueError('Must pass 2-d input')
5326
5327 return values
ValueError: Must pass 2-d input
Python对我来说仍然很陌生,但我试图关注this post。
答案 0 :(得分:2)
您将DataFrame
的输入作为包含一个元素或一维列表的列表。你应该传递实际的数组。因此,请移除dat
周围的括号:
In [9]: dat = pd.DataFrame(dat, columns = ["Var %d" % (i + 1) for i in range(10)])
In [10]: dat
Out[10]:
Var 1 Var 2 Var 3 Var 4 \
0 0.388888888889 0.388888888889 0.388888888889 0.436943311457
1 0.388888888889 0.388888888889 0.222222222222 0.445720017848
2 0.277777777778 0.277777777778 0.0555555555556 0.442623129181
3 0.111111111111 0.111111111111 0.166666666667 0.465180784545
4 0.5 0.5 0.333333333333 0.445720017848
5 0.388888888889 0.388888888889 0.222222222222 0.449433221856
6 0.388888888889 0.388888888889 0.333333333333 0.442491458743
7 0.333333333333 0.0555555555556 0.777777777778 0.438941511384
8 0.444444444444 0.444444444444 0.444444444444 0.427707051887
9 0.222222222222 0.277777777778 0.5 0.431823227653
Var 5 Var 6 Var 7 Var 8 \
0 0.790590003119 0.502046809222 0.838971773428 0.76049230908
1 0.811477946525 0.506899600792 0.836856648557 0.760617288779
2 0.788341322621 0.503717213312 0.837036254923 0.759975270403
3 0.798337900365 0.525060453789 0.846387521536 0.753358230843
4 0.787804059391 0.506899600792 0.836856648557 0.760501605832
5 0.784362288852 0.505575764415 0.83512539411 0.760417126777
6 0.787743031271 0.502995011027 0.836692391333 0.760611529526
7 0.787804059391 0.506899600792 0.836856648557 0.760501605832
8 0.79760395106 0.505723065708 0.836856648557 0.760501605832
9 0.797173287335 0.507239045809 0.845413649425 0.761341659888
Var 9 Var 10
0 0.820605442278 0
1 0.819548947891 1
2 0.81842187229 2
3 0.824154832595 3
4 0.819548947891 4
5 0.818544294533 5
6 0.819815007518 6
7 0.819548947891 7
8 0.819548947891 8
9 0.823903785101 9
不要介意columns
字段的列表理解。我只是不想输入所有Var
s:)。
答案 1 :(得分:2)
您因为将单个维度列表传递给dataframe
而导致此问题。
试试这个:
pd.DataFrame(dat, columns = ["Var 1", "Var 2", "Var 3", "Var 4", "Var 5", "Var 6", "Var 7", "Var 8", "Var 9", "Var 10"])
答案 2 :(得分:0)
您可以使用pd.DataFrame.from_records()
:
from numpy import *
import pandas as pd
dat = array([[0.3888888888888889, 0.3888888888888889, 0.3888888888888889, 0.436943311456892, 0.7905900031193156, 0.5020468092219706, 0.8389717734280283, 0.7604923090797432, 0.8206054422776056, '0'],
[0.3888888888888889, 0.3888888888888889, 0.2222222222222222, 0.4457200178477334, 0.8114779465247448, 0.506899600792241, 0.8368566485573798, 0.760617288778523, 0.8195489478905984, '1'],
[0.2777777777777778, 0.2777777777777778, 0.05555555555555555, 0.4426231291814084, 0.7883413226205706, 0.5037172133121759, 0.8370362549229062, 0.7599752704033258, 0.8184218722901648, '2'],
[0.1111111111111111, 0.1111111111111111, 0.16666666666666666, 0.4651807845446571, 0.7983379003654792, 0.5250604537887904, 0.8463875215362144, 0.7533582308429306, 0.8241548325954007, '3'],
[0.5000000000000001, 0.5000000000000001, 0.3333333333333333, 0.4457200178477334, 0.7878040593905666, 0.506899600792241, 0.8368566485573798, 0.7605016058324149, 0.8195489478905984, '4'],
[0.3888888888888889, 0.3888888888888889, 0.2222222222222222, 0.44943322185630036, 0.7843622888520198, 0.5055757644148106, 0.8351253941103399, 0.7604171267769607, 0.8185442945328569, '5'],
[0.3888888888888889, 0.3888888888888889, 0.3333333333333333, 0.4424914587425397, 0.7877430312713435, 0.5029950110274568, 0.836692391332608, 0.760611529525946, 0.8198150075184326, '6'],
[0.3333333333333333, 0.05555555555555555, 0.7777777777777778, 0.4389415113841421, 0.7878040593905666, 0.506899600792241, 0.8368566485573798, 0.7605016058324149, 0.8195489478905984, '7'],
[0.4444444444444444, 0.4444444444444444, 0.4444444444444444, 0.42770705188736874, 0.7976039510596705, 0.5057230657076256, 0.8368566485573798, 0.7605016058324149, 0.8195489478905984, '8'],
[0.2222222222222222, 0.2777777777777778, 0.5000000000000001, 0.43182322765312314, 0.7971732873351607, 0.5072390458086798, 0.84541364942531, 0.7613416598875292, 0.8239037851005895, '9']])
dat = pd.DataFrame.from_records([dat], columns = ["Var 1", "Var 2", "Var 3", "Var 4", "Var 5", "Var 6", "Var 7", "Var 8", "Var 9", "Var 10"])
希望这有帮助!