数据框'df':
>>> df = ACEV(get_SW_code('sdht', sw_cls=3))[['PB','PE','EV_EBITDA','PEG','ROIC','mg_r','opr_pft_r','net_pft_r','sales_gr','net_pft_r','Ttl_mkv']]
>>> print df
PB PE EV_EBITDA PEG ROIC mg_r opr_pft_r net_pft_r sales_gr net_pft_r Ttl_mkv
STK_ID RPT_Date
000153 20130930 2.4196 96.1577 28.4962 2.4451 0.0215 0.2420 0.0169 0.0205 -0.1744 0.0205 24.4094
000513 20130930 3.6926 25.0706 18.4354 2.4541 0.1031 0.6270 0.1375 0.1171 0.1115 0.1171 119.5308
000566 20130930 3.4557 71.1281 48.0766 -10.0123 0.0624 0.3442 0.1013 0.1048 0.2624 0.1048 50.3113
000915 20130930 7.6417 36.7593 15.4284 1.0311 0.2811 0.6573 0.3108 0.1501 0.3122 0.1501 55.6988
002262 20130930 10.8237 51.6500 35.6455 2.0330 0.2355 0.3906 0.1031 0.0883 0.1175 0.0883 87.4037
002294 20130930 7.2780 26.5449 21.4806 0.7326 0.2700 0.7687 0.4328 0.3649 0.2072 0.3649 210.4453
002332 20130930 3.8046 56.5500 29.0781 -1.3127 0.0446 0.3240 0.0290 0.0323 0.2342 0.0323 43.8699
002370 20130930 5.0226 -200.3027 -737.7620 3.0019 -0.0209 0.2699 -0.0386 -0.0190 -0.0964 -0.0190 35.1696
002393 20130930 2.3108 21.3287 16.8411 3.2078 0.1590 0.6495 0.3591 0.3295 0.1034 0.3295 66.9792
002422 20130930 2.3011 18.7523 15.4236 1.7534 0.0726 0.4491 0.1920 0.1722 0.1322 0.1722 219.3120
002437 20130930 4.9315 56.5184 52.4612 1.5637 0.0700 0.7674 0.1347 0.1422 1.0571 0.1422 113.4000
002653 20130930 13.3723 46.9329 53.3025 2.3415 0.4212 0.7145 0.4425 0.4882 0.3671 0.4882 235.8229
002693 20130930 3.1935 41.3297 38.7854 -7.6784 0.0693 0.6249 0.4072 0.4565 0.0800 0.4565 27.0720
300006 20130930 5.2367 112.0564 60.3387 -16.2008 0.0406 0.3715 0.0542 0.0802 0.1406 0.0802 56.6032
300016 20130930 5.0141 41.5183 35.1102 7.8817 0.1495 0.6945 0.2276 0.2076 0.1679 0.2076 27.1893
300026 20130930 8.9274 46.4318 37.1856 1.0558 0.2022 0.8317 0.2072 0.1770 0.7922 0.1770 143.2314
300086 20130930 1.6353 109.4084 56.0711 5.9406 0.0028 0.4658 0.1238 0.0917 -0.1995 0.0917 29.0000
300110 20130930 2.9636 37.6105 22.9847 2.5999 0.0610 0.5250 0.1728 0.1443 0.5337 0.1443 40.2620
300194 20130930 2.2659 96.6729 69.7392 -1.5238 0.0137 0.3165 0.1219 0.1087 -0.2244 0.1087 38.3085
300199 20130930 8.6975 95.1120 82.4783 7.8161 0.0909 0.8133 0.4029 0.3731 0.2354 0.3731 92.6000
300254 20130930 3.3275 69.2074 38.1987 3.6941 0.0562 0.6231 0.0952 0.0618 0.2605 0.0618 20.0700
600062 20130930 2.3357 17.6355 11.1323 1.8080 0.1230 0.3112 0.1078 0.0908 0.0298 0.0908 116.5688
600079 20130930 3.2838 35.7598 17.0296 -7.0877 0.0933 0.4182 0.1281 0.0723 0.2095 0.0723 139.2799
600276 20130930 8.2268 41.4846 31.2832 2.7490 0.2097 0.7877 0.2512 0.2040 0.1232 0.2040 497.2969
600297 20130930 2.1951 66.8464 49.0453 -1.9347 0.0411 0.2627 0.0722 0.0972 -0.5125 0.0972 20.3350
600380 20130930 1.7856 30.1339 10.2166 0.7840 0.0768 0.6047 0.1231 0.0590 -0.0068 0.0590 72.6543
600420 20130930 4.5755 33.8193 19.7438 2.0176 0.0709 0.3901 0.0803 0.0471 0.2715 0.0471 43.5053
600513 20130930 4.1317 46.4486 32.1551 1.1443 0.0727 0.4439 0.0680 0.0608 0.9255 0.0608 17.0960
600568 20130930 3.0480 59.0073 217.2803 -4.5461 0.0354 0.1229 0.0455 0.0717 0.3563 0.0717 31.0560
600664 20130930 1.3296 22.9746 10.6580 -2.7641 0.0309 0.2795 0.0243 0.0192 0.0178 0.0192 109.2965
600771 20130930 123.2274 13.6873 -134.3250 -0.1349 0.0067 0.5464 -0.1835 0.0054 -0.0652 0.0054 58.3189
600789 20130930 1.9866 -32.9747 40.2872 0.7011 -0.0232 0.1285 -0.0554 -0.0307 -0.0761 -0.0307 29.1369
600829 20130930 1.7848 55.0648 21.7546 -0.4271 -0.0692 0.3676 -0.0320 -0.0289 -0.2365 -0.0289 37.4608
[33 rows x 11 columns]
>>> df = ACEV(get_SW_code('sdht', sw_cls=3))[['PB','PE','EV_EBITDA','PEG','ROIC','mg_r','opr_pft_r','net_pft_r','sales_gr','net_pft_r','Ttl_mkv']].sort('EV_EBITDA')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "D:\Python2.7\lib\site-packages\pandas\core\frame.py", line 2664, in sort
inplace=inplace)
File "D:\Python2.7\lib\site-packages\pandas\core\frame.py", line 2756, in sort_index
return self.take(indexer, axis=axis, convert=False, is_copy=False)
File "D:\Python2.7\lib\site-packages\pandas\core\generic.py", line 1126, in take
new_data = self._data.take(indices, axis=baxis)
File "D:\Python2.7\lib\site-packages\pandas\core\internals.py", line 3326, in take
ref_items=new_axes[0], axis=axis)
File "D:\Python2.7\lib\site-packages\pandas\core\internals.py", line 2304, in apply
do_integrity_check=do_integrity_check)
File "D:\Python2.7\lib\site-packages\pandas\core\internals.py", line 1957, in __init__
self._set_ref_locs(do_refs=True)
File "D:\Python2.7\lib\site-packages\pandas\core\internals.py", line 2097, in _set_ref_locs
'have _ref_locs set' % (block, labels))
AssertionError: Cannot create BlockManager._ref_locs because block [FloatBlock: [PB], 1 x 33, dtype: float64] with duplicate items [Index([u'PB', u'PE', u'EV_EBITDA', u'PEG', u'ROIC', u'mg_r', u'opr_pft_r', u'net_pft_r', u'sales_gr', u'net_pft_r', u'Ttl_mkv'], dtype='object')] does not have _ref_locs set
>>>
>>> pd.__version__
'0.13.0-203-g4518630'
只是想知道如何修复df.sort引起的错误?
答案 0 :(得分:2)
错误消息与所有BlockManager
/ _ref_locs
内容有点混淆,但似乎是因为您选择了重复的列(net_pft_r
):
df = ACEV(get_SW_code('sdht', sw_cls=3))[['PB','PE','EV_EBITDA','PEG','ROIC',
'mg_r','opr_pft_r','net_pft_r','sales_gr','net_pft_r','Ttl_mkv']]
例如:
>>> df = pd.DataFrame({"A": range(3), "B": range(3,6)})
>>> df[["A", "B", "A"]].sort("B")
Traceback (most recent call last):
File "<ipython-input-53-12e1eca79c2b>", line 1, in <module>
df[["A", "B", "A"]].sort("B")
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/frame.py", line 2664, in sort
inplace=inplace)
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/frame.py", line 2756, in sort_index
return self.take(indexer, axis=axis, convert=False, is_copy=False)
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/generic.py", line 1126, in take
new_data = self._data.take(indices, axis=baxis)
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/internals.py", line 3326, in take
ref_items=new_axes[0], axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/internals.py", line 2304, in apply
do_integrity_check=do_integrity_check)
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/internals.py", line 1957, in __init__
self._set_ref_locs(do_refs=True)
File "/usr/local/lib/python2.7/dist-packages/pandas-0.13.0_212_g65b9815-py2.7-linux-i686.egg/pandas/core/internals.py", line 2097, in _set_ref_locs
'have _ref_locs set' % (block, labels))
AssertionError: Cannot create BlockManager._ref_locs because block [IntBlock: [A], 1 x 3, dtype: int64] with duplicate items [Index([u'A', u'B', u'A'], dtype='object')] does not have _ref_locs set
我不确定这是否是您的错误,或者您是否有重复的列名或什么,但如果您避免它,我认为它应该有效。