我在data.table
中有以下数据:
> aru
tot wk l
1: 214.31 27 4
2: 337.10 27 2
3: 606.00 27 5
4: 146.80 27 1
5: 4486.59 27 4
---
579429: 404.00 9 6
579430: 404.00 9 4
579431: 199.90 9 3
579432: 150.00 9 2
579433: 273.00 9 3
其中:
"周" - 一年中的一周 " TOT" - 按客户群销售" l"在周"周"。
我想要实现的目标是将每个客户群每周的销售额占该周总销售额的百分比。
到目前为止,我已经能够提出:
版本1。
aru[,.(l,tot,Sales_Total=sum(tot)),by="wk"]\
[,.SD[,.(Pct=sum(tot)/unique(Sales_Total))], by="wk,l"]
wk l Pct
1: 27 4 0.348065946
2: 27 2 0.232702135
3: 27 5 0.033854772
4: 27 1 0.249736754
5: 27 3 0.115843350
---
212: 9 2 0.396389787
213: 9 5 0.030511998
214: 9 1 0.260533590
215: 9 3 0.130408076
216: 9 4 0.157098738
版本2
aru[,.(l,tot,Sales_Total=sum(tot)),by="wk"]\
[,.(Pct=sum(tot)/Sales_Total),by="wk,l"][,.SD[1],by="wk,l"]
wk l Pct
1: 27 4 0.348065946
2: 27 2 0.232702135
3: 27 5 0.033854772
4: 27 1 0.249736754
5: 27 3 0.115843350
---
212: 9 2 0.396389787
213: 9 5 0.030511998
214: 9 1 0.260533590
215: 9 3 0.130408076
216: 9 4 0.157098738
使用data.table()
是否有更优雅有效的方法来达到预期的结果?提前谢谢!
答案 0 :(得分:2)
使用data.table
而不使用.SD
的选项将是
aru[aru[, .(Sales_Total= sum(tot)), by = wk], on = 'wk'
][, .(Pct= sum(tot)/unique(Sales_Total)) ,.(wk, l)]
或以.EACHI
unique(aru[aru[, .(Sales_Total=sum(tot), l) , wk], .(Pct= sum(tot)/unique(Sales_Total)),
on = c("wk", "l"), by = .EACHI])
aru <- structure(list(tot = c(214.31, 337.1, 606, 146.8, 4486.59, 404,
404, 199.9, 150, 273), wk = c(27L, 27L, 27L, 27L, 27L, 9L, 9L,
9L, 9L, 9L), l = c(4L, 2L, 5L, 1L, 4L, 6L, 4L, 3L, 2L, 3L)), .Names = c("tot",
"wk", "l"), class = "data.frame", row.names = c(NA, -10L))
setDT(aru)
答案 1 :(得分:2)
这是一个标准的两步程序:
aru[, weekly.total := sum(tot), by = wk][, sum(tot) / weekly.total[1], by = .(wk, l)]
答案 2 :(得分:1)
使用dplyr
:
install.packages('dplyr')
library(dplyr)
aru <- group_by(aru, wk, l)
aru_summary <- summarise(aru, pct = percent_rank(tot))
如果我有一些数据可供使用,我可以对其进行测试,但实际上这会将您的数据按周分组,然后按“&#39; l”进行分组,并找出每个组中总销售额的百分比排名。< / p>
答案 3 :(得分:0)
对4种不同的解决方案进行基准测试,其中包括2个@ akrun&#39>:
f1 <- function(aru) {aru[aru[, .(Sales_Total= sum(tot)), by = wk], on = 'wk'
][, .(Pct= sum(tot)/unique(Sales_Total)) ,.(wk, l)]}
f2 <- function(aru) {unique(aru[aru[, .(Sales_Total=sum(tot), l) , wk
], .(Pct= sum(tot)/unique(Sales_Total)), on = c("wk", "l"), by = .EACHI])}
f3 <- function(aru) {aru[,.(l,tot,Sales_Total=sum(tot)),by="wk"
][,.(Pct=sum(tot)/unique(Sales_Total)), by="wk,l"]}
f4 <- function(aru) {aru[,Sales_Total:=sum(tot),by="wk"
][,.SD[,.(Pct=sum(tot)/Sales_Total)][1],by="wk,l"]}
f5 <- function(aru) {aru[,Sales_Total:=sum(tot),by="wk"
][,.(Pct=sum(tot)/Sales_Total[1]),by="wk,l"]}
f6 <- function(aru) {aru[, weekly.total := sum(tot), by = wk
][, sum(tot) / weekly.total[1], by = .(wk, l)]}
library(microbenchmark)
aru2 <- copy(aru)
aru3 <- copy(aru)
aru4 <- copy(aru)
aru5 <- copy(aru)
aru6 <- copy(aru)
microbenchmark(f5(aru5), f6(aru6), f3(aru3), f1(aru),f4(aru4),f2(aru2), times=1)
Unit: milliseconds
expr min lq mean median uq max neval
f5(aru5) 36.34306 36.34306 36.34306 36.34306 36.34306 36.34306 1
f6(aru6) 36.16731 36.16731 36.16731 36.16731 36.16731 36.16731 1
f3(aru3) 119.60882 119.60882 119.60882 119.60882 119.60882 119.60882 1
f1(aru) 120.61820 120.61820 120.61820 120.61820 120.61820 120.61820 1
f4(aru4) 165.02017 165.02017 165.02017 165.02017 165.02017 165.02017 1
f2(aru2) 8577.14333 8577.14333 8577.14333 8577.14333 8577.14333 8577.14333 1