Data.table具有多种使用by =
函数制作数据透视表的方法,
但是我们如何按形状将细分信息分组到 SAME 组中?
样本数据
# DT
DT <- data.table(GROUP = c("A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP",
"A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP"),
TYPE = c("A","B","C","D","E",
"B","B","A","A","E"),
AMOUNT =c(123,1424,1244,2111,44559,
128,1221,12144,11,439))
单独的表格,但没有在一帧中分组
# ALL
ALL_G <- DT[,.(SUM = format(sum(AMOUNT),big.mark=",")),by = TYPE]
# A_GROUP Breakdown 1
A_G <- DT[grepl("A_GROUP",GROUP),.(SUM =format(sum(AMOUNT),big.mark=",")),by = TYPE]
# B_GROUP Breakdown 2
B_G <- DT[grepl("B_GROUP",GROUP),.(SUM = format(sum(AMOUNT),big.mark=",")),by = TYPE]
期望形状
# TARGET
TYPE ALL SUM A_GROUP_SUM B_GROUP_SUM
A 12,278 123 12,155
B 2,773 128 2,645
C 1,244 0 1,244
D 2,111 0 2,111
E 44,998 44998 0
我该如何实现?
答案 0 :(得分:1)
library( data.table)
# sample data
DT <- data.table(GROUP = c("A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP",
"A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP"),
TYPE = c("A","B","C","D","E",
"B","B","A","A","E"),
AMOUNT =c(123,1424,1244,2111,44559,
128,1221,12144,11,439))
#create a dt for the sum by TYPE
dt1 <- DT[, list( ALL_SUM = sum( AMOUNT ) ), by = "TYPE" ]
#create a dt for the sum by TYPE and GROUP
dt2 <- DT[, list( sum = sum( AMOUNT ) ), by = c( "TYPE", "GROUP" )]
#rename the groups to the desired column names
dt2[, GROUP := paste0( GROUP, "_SUM" )]
#cast to wide format
dt2 <- dcast( dt2, TYPE ~ GROUP, value.var = "sum", fill = 0 )
# option 1: join together (you can use setcolorder() afterwards to get the desired order of columns)
dt2[dt1, on = "TYPE"]
#option 2: bind together (drop the first colum of dt2, oly works of both dt's have the same number of rows)
cbind( dt1, dt2[, -1] )
# TYPE ALL_SUM A_GROUP_SUM B_GROUP_SUM
# 1 A 12278 123 12155
# 2 B 2773 128 2645
# 3 C 1244 0 1244
# 4 D 2111 0 2111
# 5 E 44998 44998 0