df1 = pd.read_table(r'E:\빅데이터 캠퍼스\골목상권 프로파일링 - 서울 열린데이터 광 장\3.초기-16년5월분1\17.상권-추정매출\201301- 201605\tbsm_trdar_selng.txt\tbsm_trdar_selng_utf8.txt' ,
sep='|',
header=None ,
usecols=[0 ,1, 2, 3 ,4, 11,12 ,82],
names=['STDR_YM_CD', 'TRDAR_CD', 'TRDAR_CD_NM', 'SVC_INDUTY_CD', 'SVC_INDUTY_CD_NM', 'THSMON_SELNG_AMT', 'THSMON_SELNG_CO', 'STOR_CO'],
dtype = { '0' : int}
, encoding='utf-8')
df2 = df1[(df1['STDR_YM_CD'] >= 201301 ) & (df1['STDR_YM_CD'] < 201410 )] # 칼럼에 조건걸어 빼기
df3 = df1[(df1['SVC_INDUTY_CD'] == 'CS100001' ) | (df1['SVC_INDUTY_CD'] == 'CS100002' )
| (df1['SVC_INDUTY_CD'] == 'CS100003' ) | (df1['SVC_INDUTY_CD'] == 'CS100004' )
| (df1['SVC_INDUTY_CD'] == 'CS100005' ) | (df1['SVC_INDUTY_CD'] == 'CS100006' )
| (df1['SVC_INDUTY_CD'] == 'CS100007' ) | (df1['SVC_INDUTY_CD'] == 'CS100008' ) | (df1['SVC_INDUTY_CD'] == 'CS100009' )
| (df1['SVC_INDUTY_CD'] == 'CS100010' )]
df3_agg = df3.groupby(['STDR_YM_CD', 'TRDAR_CD' ]).sum() #합치기
df3 = df1[(df1['STDR_YM_CD'] >= 201410 ) & (df1['STDR_YM_CD'] < 201606 )]
df3_bc = df3[df3['SVC_INDUTY_CD'] == 'CS100000']
df3_bc_agg = df3_bc.groupby(['STDR_YM_CD', 'TRDAR_CD' ]).sum()
df4 = pd.concat([df3_agg, df3_bc_agg ]) #축으로 이어붙이기
print(df4.head())
df5 = df4.unstack('TRDAR_CD') #축간 변환
print(df5.head())
df4.to_csv(r'E:\빅데이터 캠퍼스\데이터 분석정리\restaurant business.csv')
当我使用print(df4.head())时,我有一个以下数据框
THSMON_SELNG_AMT THSMON_SELNG_CO STOR_CO
STDR_YM_CD TRDAR_CD
201301 11947 170760672 14533 10
11948 2293679459 50555 65
11949 501996924 21138 21
11950 314447675 23958 20
11953 327033726 11913 37
我尝试将此数据框拆分为下图
THSMON_SELNG_AMT
STDR_YM_CD 201301 201302 201303
TRDAR_CD
11947 170760672 blah blah blah
11948 2293679459 blah blah blah
11949 501996924 blah blah blah
11950 314447675 blah blah blah
11953 327033726 blah blah blah
是的,这意味着我不知道具体的价值观。
但是,我试过这个代码df5 = df4.unstack('TRDAR_CD')
我得到'ValueError:索引包含重复的条目,无法重塑'
我不知道为什么?
帮我PLZ ~~