EDIT2 我在python2.7和python3.6中检查了它,结果相同。
添加更多复制粘贴友好版本:
In [1]: import pandas as pd
In [2]: from io import StringIO
In [3]: csv = u"""
...: Index,SH600000,SZ002222
...: 0,2145799.0,282838.0
...: 1,2104693.0,705100.0
...: 2,1228606.0,394897.0
...: 3,638308.0,276903.0
...: 4,387360.0,337920.0
...: 5,292297.0,198000.0
...: 6,402659.0,166301.0
...: 7,1287122.0,268300.0
...: 8,2039270.0,66000.0
...: 9,2232800.0,86107.0
...: 10,1809037.0,155093.0
...: 11,363093.0,79157.0
...: 12,708322.0,98592.0
...: 13,520142.0,96201.0
...: 14,671617.0,57053.0
...: 15,1049536.0,410847.0
...: 16,279062.0,132880.0
...: 17,982549.0,104566.0
...: 18,664100.0,104519.0
...: 19,989327.0,174801.0
...: 20,581799.0,110500.0
...: 21,521880.0,60200.0
...: 22,3483513.0,276500.0
...: 23,2841970.0,59907.0
...: 24,1107128.0,50800.0
...: 25,1176953.0,40150.0
...: 26,304900.0,46300.0
...: 27,412040.0,55309.0
...: 28,259642.0,9500.0
...: 29,425157.0,18700.0
...: 30,1379000.0,63900.0
...: 31,688590.0,92101.0
...: 32,456862.0,71399.0
...: 33,2490348.0,26600.0
...: 34,1004700.0,80800.0
...: 35,947848.0,28400.0
...: 36,1426061.0,37057.0
...: 37,1140738.0,68850.0
...: 38,551291.0,42870.0
...: 39,1434669.0,57550.0
...: 40,2356270.0,84300.0
...: 41,2000965.0,55823.0
...: 42,401515.0,47027.0
...: 43,732070.0,98550.0
...: 44,1670248.0,149350.0
...: 45,1508998.0,123200.0
...: 46,1466045.0,31200.0
...: 47,1665586.0,31700.0"""
In [4]: dat = pd.read_csv(StringIO(csv.strip()), index_col='Index').astype('float32')
In [5]: dat.sum()
Out[5]:
SH600000 55232496.0
SZ002222 6114618.0
dtype: float32
In [6]: dat.stack().sum(level=1)
Out[6]:
SH600000 55232484.0
SZ002222 6114618.0
dtype: float32
In [7]: dat['SH600000'].sum()
Out[7]: 55232488.0
In [8]: pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 2.7.13.final.0
python-bits: 64
OS: Linux
OS-release: 2.6.32-504.23.4.el6.x86_64
machine: x86_64
processor: x86_64
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: None.None
pandas: 0.20.3
pytest: None
pip: 9.0.1
setuptools: 36.0.1
Cython: None
numpy: 1.13.1
scipy: 0.19.1
xarray: None
IPython: 5.4.0
sphinx: None
patsy: 0.4.1
dateutil: 2.6.0
pytz: 2017.2
blosc: None
bottleneck: None
tables: 3.4.2
numexpr: 2.6.2
feather: None
matplotlib: 2.0.2
openpyxl: None
xlrd: None
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 0.999999999
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
pandas_gbq: None
pandas_datareader: None
我有一个数据框如下:
In [23]: dat
Out[23]:
tid SH600000 SZ002222
datetime
2013-08-09 09:35:00 2145799.0 282838.0
2013-08-09 09:40:00 2104693.0 705100.0
2013-08-09 09:45:00 1228606.0 394897.0
2013-08-09 09:50:00 638308.0 276903.0
2013-08-09 09:55:00 387360.0 337920.0
2013-08-09 10:00:00 292297.0 198000.0
2013-08-09 10:05:00 402659.0 166301.0
2013-08-09 10:10:00 1287122.0 268300.0
2013-08-09 10:15:00 2039270.0 66000.0
2013-08-09 10:20:00 2232800.0 86107.0
2013-08-09 10:25:00 1809037.0 155093.0
2013-08-09 10:30:00 363093.0 79157.0
2013-08-09 10:35:00 708322.0 98592.0
2013-08-09 10:40:00 520142.0 96201.0
2013-08-09 10:45:00 671617.0 57053.0
2013-08-09 10:50:00 1049536.0 410847.0
2013-08-09 10:55:00 279062.0 132880.0
2013-08-09 11:00:00 982549.0 104566.0
2013-08-09 11:05:00 664100.0 104519.0
2013-08-09 11:10:00 989327.0 174801.0
2013-08-09 11:15:00 581799.0 110500.0
2013-08-09 11:20:00 521880.0 60200.0
2013-08-09 11:25:00 3483513.0 276500.0
2013-08-09 11:30:00 2841970.0 59907.0
2013-08-09 13:05:00 1107128.0 50800.0
2013-08-09 13:10:00 1176953.0 40150.0
2013-08-09 13:15:00 304900.0 46300.0
2013-08-09 13:20:00 412040.0 55309.0
2013-08-09 13:25:00 259642.0 9500.0
2013-08-09 13:30:00 425157.0 18700.0
2013-08-09 13:35:00 1379000.0 63900.0
2013-08-09 13:40:00 688590.0 92101.0
2013-08-09 13:45:00 456862.0 71399.0
2013-08-09 13:50:00 2490348.0 26600.0
2013-08-09 13:55:00 1004700.0 80800.0
2013-08-09 14:00:00 947848.0 28400.0
2013-08-09 14:05:00 1426061.0 37057.0
2013-08-09 14:10:00 1140738.0 68850.0
2013-08-09 14:15:00 551291.0 42870.0
2013-08-09 14:20:00 1434669.0 57550.0
2013-08-09 14:25:00 2356270.0 84300.0
2013-08-09 14:30:00 2000965.0 55823.0
2013-08-09 14:35:00 401515.0 47027.0
2013-08-09 14:40:00 732070.0 98550.0
2013-08-09 14:45:00 1670248.0 149350.0
2013-08-09 14:50:00 1508998.0 123200.0
2013-08-09 14:55:00 1466045.0 31200.0
2013-08-09 15:00:00 1665586.0 31700.0
我想计算SH600000
沿datetime
的总和值,有三种方法:
In [24]: dat.sum()
Out[24]:
tid
SH600000 55232496.0
SZ002222 6114618.0
dtype: float32
In [25]: dat.stack().sum(level='tid')
Out[25]:
tid
SH600000 55232484.0
SZ002222 6114618.0
dtype: float32
In [26]: dat['SH600000'].sum()
Out[26]: 55232488.0
我们可以看到每种方式有三种不同的结果。我知道由于float32
的精度限制,结果不准确。但我预计这三个结果应该是相同的不准确一个。
修改
@Ken dat
的记录格式如下:
In [7]: dat.to_records()
Out[7]:
rec.array([(datetime.datetime(2013, 8, 9, 9, 35), 2145799., 282838.),
(datetime.datetime(2013, 8, 9, 9, 40), 2104693., 705100.),
(datetime.datetime(2013, 8, 9, 9, 45), 1228606., 394897.),
(datetime.datetime(2013, 8, 9, 9, 50), 638308., 276903.),
(datetime.datetime(2013, 8, 9, 9, 55), 387360., 337920.),
(datetime.datetime(2013, 8, 9, 10, 0), 292297., 198000.),
(datetime.datetime(2013, 8, 9, 10, 5), 402659., 166301.),
(datetime.datetime(2013, 8, 9, 10, 10), 1287122., 268300.),
(datetime.datetime(2013, 8, 9, 10, 15), 2039270., 66000.),
(datetime.datetime(2013, 8, 9, 10, 20), 2232800., 86107.),
(datetime.datetime(2013, 8, 9, 10, 25), 1809037., 155093.),
(datetime.datetime(2013, 8, 9, 10, 30), 363093., 79157.),
(datetime.datetime(2013, 8, 9, 10, 35), 708322., 98592.),
(datetime.datetime(2013, 8, 9, 10, 40), 520142., 96201.),
(datetime.datetime(2013, 8, 9, 10, 45), 671617., 57053.),
(datetime.datetime(2013, 8, 9, 10, 50), 1049536., 410847.),
(datetime.datetime(2013, 8, 9, 10, 55), 279062., 132880.),
(datetime.datetime(2013, 8, 9, 11, 0), 982549., 104566.),
(datetime.datetime(2013, 8, 9, 11, 5), 664100., 104519.),
(datetime.datetime(2013, 8, 9, 11, 10), 989327., 174801.),
(datetime.datetime(2013, 8, 9, 11, 15), 581799., 110500.),
(datetime.datetime(2013, 8, 9, 11, 20), 521880., 60200.),
(datetime.datetime(2013, 8, 9, 11, 25), 3483513., 276500.),
(datetime.datetime(2013, 8, 9, 11, 30), 2841970., 59907.),
(datetime.datetime(2013, 8, 9, 13, 5), 1107128., 50800.),
(datetime.datetime(2013, 8, 9, 13, 10), 1176953., 40150.),
(datetime.datetime(2013, 8, 9, 13, 15), 304900., 46300.),
(datetime.datetime(2013, 8, 9, 13, 20), 412040., 55309.),
(datetime.datetime(2013, 8, 9, 13, 25), 259642., 9500.),
(datetime.datetime(2013, 8, 9, 13, 30), 425157., 18700.),
(datetime.datetime(2013, 8, 9, 13, 35), 1379000., 63900.),
(datetime.datetime(2013, 8, 9, 13, 40), 688590., 92101.),
(datetime.datetime(2013, 8, 9, 13, 45), 456862., 71399.),
(datetime.datetime(2013, 8, 9, 13, 50), 2490348., 26600.),
(datetime.datetime(2013, 8, 9, 13, 55), 1004700., 80800.),
(datetime.datetime(2013, 8, 9, 14, 0), 947848., 28400.),
(datetime.datetime(2013, 8, 9, 14, 5), 1426061., 37057.),
(datetime.datetime(2013, 8, 9, 14, 10), 1140738., 68850.),
(datetime.datetime(2013, 8, 9, 14, 15), 551291., 42870.),
(datetime.datetime(2013, 8, 9, 14, 20), 1434669., 57550.),
(datetime.datetime(2013, 8, 9, 14, 25), 2356270., 84300.),
(datetime.datetime(2013, 8, 9, 14, 30), 2000965., 55823.),
(datetime.datetime(2013, 8, 9, 14, 35), 401515., 47027.),
(datetime.datetime(2013, 8, 9, 14, 40), 732070., 98550.),
(datetime.datetime(2013, 8, 9, 14, 45), 1670248., 149350.),
(datetime.datetime(2013, 8, 9, 14, 50), 1508998., 123200.),
(datetime.datetime(2013, 8, 9, 14, 55), 1466045., 31200.),
(datetime.datetime(2013, 8, 9, 15, 0), 1665586., 31700.)],
dtype=[(u'datetime', 'O'), (u'SH600000', '<f4'), (u'SZ002222', '<f4')])
@S Ringne我reset_index
如你所说,但没有运气:
In [53]: dat.reset_index(inplace=True)
In [54]: dat.head()
Out[54]:
tid datetime SH600000 SZ002222
0 2013-08-09 09:35:00 2145799.0 282838.0
1 2013-08-09 09:40:00 2104693.0 705100.0
2 2013-08-09 09:45:00 1228606.0 394897.0
3 2013-08-09 09:50:00 638308.0 276903.0
4 2013-08-09 09:55:00 387360.0 337920.0
In [55]: dat[['SH600000', 'SZ002222']].sum()
Out[55]:
tid
SH600000 55232496.0
SZ002222 6114618.0
dtype: float32
In [56]: dat[['SH600000', 'SZ002222']].stack().sum(level=1)
Out[56]:
tid
SH600000 55232484.0
SZ002222 6114618.0
dtype: float32
In [57]: dat['SH600000'].sum()
Out[57]: 55232488.0
答案 0 :(得分:1)
我猜你有一些变量被覆盖,因为相同的数据集给了我正确的结果。
import pandas as pd
import datetime
val1 = [(datetime.datetime(2013, 8, 9, 9, 35), 2145799., 282838.),
(datetime.datetime(2013, 8, 9, 9, 40), 2104693., 705100.),
(datetime.datetime(2013, 8, 9, 9, 45), 1228606., 394897.),
(datetime.datetime(2013, 8, 9, 9, 50), 638308., 276903.),
(datetime.datetime(2013, 8, 9, 9, 55), 387360., 337920.),
(datetime.datetime(2013, 8, 9, 10, 0), 292297., 198000.),
(datetime.datetime(2013, 8, 9, 10, 5), 402659., 166301.),
(datetime.datetime(2013, 8, 9, 10, 10), 1287122., 268300.),
(datetime.datetime(2013, 8, 9, 10, 15), 2039270., 66000.),
(datetime.datetime(2013, 8, 9, 10, 20), 2232800., 86107.),
(datetime.datetime(2013, 8, 9, 10, 25), 1809037., 155093.),
(datetime.datetime(2013, 8, 9, 10, 30), 363093., 79157.),
(datetime.datetime(2013, 8, 9, 10, 35), 708322., 98592.),
(datetime.datetime(2013, 8, 9, 10, 40), 520142., 96201.),
(datetime.datetime(2013, 8, 9, 10, 45), 671617., 57053.),
(datetime.datetime(2013, 8, 9, 10, 50), 1049536., 410847.),
(datetime.datetime(2013, 8, 9, 10, 55), 279062., 132880.),
(datetime.datetime(2013, 8, 9, 11, 0), 982549., 104566.),
(datetime.datetime(2013, 8, 9, 11, 5), 664100., 104519.),
(datetime.datetime(2013, 8, 9, 11, 10), 989327., 174801.),
(datetime.datetime(2013, 8, 9, 11, 15), 581799., 110500.),
(datetime.datetime(2013, 8, 9, 11, 20), 521880., 60200.),
(datetime.datetime(2013, 8, 9, 11, 25), 3483513., 276500.),
(datetime.datetime(2013, 8, 9, 11, 30), 2841970., 59907.),
(datetime.datetime(2013, 8, 9, 13, 5), 1107128., 50800.),
(datetime.datetime(2013, 8, 9, 13, 10), 1176953., 40150.),
(datetime.datetime(2013, 8, 9, 13, 15), 304900., 46300.),
(datetime.datetime(2013, 8, 9, 13, 20), 412040., 55309.),
(datetime.datetime(2013, 8, 9, 13, 25), 259642., 9500.),
(datetime.datetime(2013, 8, 9, 13, 30), 425157., 18700.),
(datetime.datetime(2013, 8, 9, 13, 35), 1379000., 63900.),
(datetime.datetime(2013, 8, 9, 13, 40), 688590., 92101.),
(datetime.datetime(2013, 8, 9, 13, 45), 456862., 71399.),
(datetime.datetime(2013, 8, 9, 13, 50), 2490348., 26600.),
(datetime.datetime(2013, 8, 9, 13, 55), 1004700., 80800.),
(datetime.datetime(2013, 8, 9, 14, 0), 947848., 28400.),
(datetime.datetime(2013, 8, 9, 14, 5), 1426061., 37057.),
(datetime.datetime(2013, 8, 9, 14, 10), 1140738., 68850.),
(datetime.datetime(2013, 8, 9, 14, 15), 551291., 42870.),
(datetime.datetime(2013, 8, 9, 14, 20), 1434669., 57550.),
(datetime.datetime(2013, 8, 9, 14, 25), 2356270., 84300.),
(datetime.datetime(2013, 8, 9, 14, 30), 2000965., 55823.),
(datetime.datetime(2013, 8, 9, 14, 35), 401515., 47027.),
(datetime.datetime(2013, 8, 9, 14, 40), 732070., 98550.),
(datetime.datetime(2013, 8, 9, 14, 45), 1670248., 149350.),
(datetime.datetime(2013, 8, 9, 14, 50), 1508998., 123200.),
(datetime.datetime(2013, 8, 9, 14, 55), 1466045., 31200.),
(datetime.datetime(2013, 8, 9, 15, 0), 1665586., 31700.)]
df2 = pd.DataFrame(val1,columns =['datetime','SH600000','SZ002222'])
df2['SZ002222'] = df2['SZ002222'].astype(np.float64)
df2['SH600000'] = df2['SH600000'].astype(np.float64)
df2.sum()
Out[237]:
SH600000 55232485.0
SZ002222 6114618.0
df2['SH600000'].sum()
Out[239]: 55232485.0
检查此链接是否存在numpy array dtype的不同行为: