我想要每组日期的组倍增。 格式1 - 预期输出应如下:
date Bucket D DP
1/31/2013 bkt 0 NA
1/31/2013 bkt 1(10-20) NA
1/31/2013 bkt 2(20-30) NA
1/31/2013 bkt 3(30-40) NA
1/31/2013 bkt 4(40+) NA
2/28/2013 bkt 0 NA
2/28/2013 bkt 1(10-20) 3.00
2/28/2013 bkt 2(20-30) 3.63
2/28/2013 bkt 3(30-40) 101
2/28/2013 bkt 4(40+) 102
3/30/2013 bkt 0 NA
3/30/2013 bkt 1(10-20) 0.55
3/30/2013 bkt 2(20-30) 0.40
3/30/2013 bkt 3(30-40) 103
3/30/2013 bkt 4(40+) 104
4/31/2013 bkt 0 NA
4/31/2013 bkt 1(10-20) 4.25
4/31/2013 bkt 2(20-30) 3.65
4/31/2013 bkt 3(30-40) 105
4/31/2013 bkt 4(40+) 106
5/30/2013 bkt 0 NA
5/30/2013 bkt 1(10-20) 2.34 13608 (108 * 105 * 0.40 * 3.00)
5/30/2013 bkt 2(20-30) 4.10 4536 (108 * 105 * 0.40)
5/30/2013 bkt 3(30-40) 107 11340 (108 * 105)
5/30/2013 bkt 4(40+) 108 108 (108)
格式2 为了更多的理解 - 我已按以下方式安排数据:
1/31/2013 2/28/2013 3/30/2013 4/31/2013 5/30/2013 DP
NA NA NA NA NA NA
NA 3.00 0.55 4.25 2.34 13608 (108 * 105 * 0.40 * 3.00)
NA 3.63 0.40 3.65 4.10 4536 (108 * 105 * 0.40)
NA 101 103 105 107 11340 (108 * 105)
NA 102 104 106 108 108 (108)
但我的CSV文件格式为1: 请帮助,在此先感谢
答案 0 :(得分:0)
我制作了一些与你的格式相同的数据:
set.seed(1)
DF <- data.frame( date = rep(c(ymd("2017-01-01", "2016-01-01", "2015-01-01", "2014-01-01", "2013-01-01")), each=5),
group = rep(1:5, each=5),
value = rnorm(25))
head(DF)
date group value
1 2017-01-01 1 -0.05612874
2 2017-01-01 1 -0.15579551
3 2017-01-01 1 -1.47075238
4 2017-01-01 1 -0.47815006
我是这样做的:
M <- matrix(DF$value, ncol=5)
diag.M <- diag(M)
diag.product <- sapply(5:1, function(x) prod(diag.M[1:x]))
cbind(M, diag.product) %>%
as.data.frame()
我将DF
转换为matrix
。我取矩阵的diag
(对角线值列表)。我使用diag
计算sapply
的产品,从5到1,4对1,3到1等。我将diag.product的cbind
列绑定到矩阵并转换为data.frame
。
答案 1 :(得分:0)
对于 date 和 Bucket 因子级别的每个组合,使用prod
考虑滑动条件by
乘法。为了验证,下面打印出每个乘法中的因子,但不需要print
行内部函数:
数据强>
df <- read.table(text='date Bucket D
"1/31/2013" "bkt 0" NA
"1/31/2013" "bkt 1(10-20)" NA
"1/31/2013" "bkt 2(20-30)" NA
"1/31/2013" "bkt 3(30-40)" NA
"1/31/2013" "bkt 4(40+)" NA
"2/28/2013" "bkt 0" NA
"2/28/2013" "bkt 1(10-20)" 3.00
"2/28/2013" "bkt 2(20-30)" 3.63
"2/28/2013" "bkt 3(30-40)" 101
"2/28/2013" "bkt 4(40+)" 102
"3/30/2013" "bkt 0" NA
"3/30/2013" "bkt 1(10-20)" 0.55
"3/30/2013" "bkt 2(20-30)" 0.40
"3/30/2013" "bkt 3(30-40)" 103
"3/30/2013" "bkt 4(40+)" 104
"4/31/2013" "bkt 0" NA
"4/31/2013" "bkt 1(10-20)" 4.25
"4/31/2013" "bkt 2(20-30)" 3.65
"4/31/2013" "bkt 3(30-40)" 105
"4/31/2013" "bkt 4(40+)" 106
"5/30/2013" "bkt 0" NA
"5/30/2013" "bkt 1(10-20)" 2.34
"5/30/2013" "bkt 2(20-30)" 4.10
"5/30/2013" "bkt 3(30-40)" 107
"5/30/2013" "bkt 4(40+)" 108
"6/30/2013" "bkt 0" NA
"6/30/2013" "bkt 1(10-20)" 4.00
"6/30/2013" "bkt 2(20-30)" 5.00
"6/30/2013" "bkt 3(30-40)" 109.00
"6/30/2013" "bkt 4(40+)" 110.00
"7/30/2013" "bkt 0" NA
"7/30/2013" "bkt 1(10-20)" 8.00
"7/30/2013" "bkt 2(20-30)" 7.00
"7/30/2013" "bkt 3(30-40)" 111.00
"7/30/2013" "bkt 4(40+)" 112.00', header=TRUE)
<强>代码强>
# ADD LEVEL COLUMNS
df$datelvl <- as.integer(as.factor(df$date))
df$bucketlvl <- as.integer(as.factor(df$Bucket))
# RUN CONDITIONAL prod BY EACH LEVEL WITH by
df$DP <- as.numeric(by(df, df[,c("bucketlvl", "datelvl")], FUN=function(i) {
if(i$datelvl[1] >= 5) { # CONDITION FOR NUMBER OF BUCKET GROUP
tmp <- as.numeric(df[(df$datelvl==i$datelvl[1] & df$bucketlvl==5) |
(df$datelvl==i$datelvl[1]-1 & df$bucketlvl==4) |
(df$datelvl==i$datelvl[1]-2 & df$bucketlvl==3) |
(df$datelvl==i$datelvl[1]-3 & df$bucketlvl==2) |
(df$datelvl==i$datelvl[1]-4 & df$bucketlvl==1) ,"D"])
} else {
tmp <- NA
}
print(tmp[i$bucketlvl[1]:length(tmp)])
prod(tmp[i$bucketlvl[1]:length(tmp)])
}))
打印输出
# [1] NA
# [1] NA NA
# [1] NA NA NA
# [1] NA NA NA NA
# [1] NA NA NA NA NA
# [1] NA
# [1] NA NA
# [1] NA NA NA
# [1] NA NA NA NA
# [1] NA NA NA NA NA
# [1] NA
# [1] NA NA
# [1] NA NA NA
# [1] NA NA NA NA
# [1] NA NA NA NA NA
# [1] NA 3.63 103.00 106.00
# [1] 3.63 103.00 106.00
# [1] 103 106
# [1] 106
# [1] NA 106
# [1] NA 3.0 0.4 105.0 108.0
# [1] 3.0 0.4 105.0 108.0
# [1] 0.4 105.0 108.0
# [1] 105 108
# [1] 108
# [1] NA 0.55 3.65 107.00 110.00
# [1] 0.55 3.65 107.00 110.00
# [1] 3.65 107.00 110.00
# [1] 107 110
# [1] 110
# [1] NA 4.25 4.10 109.00 112.00
# [1] 4.25 4.10 109.00 112.00
# [1] 4.1 109.0 112.0
# [1] 109 112
# [1] 112
数据框输出
# date Bucket D datelvl bucketlvl DP
# 1 1/31/2013 bkt 0 NA 1 1 NA
# 2 1/31/2013 bkt 1(10-20) NA 1 2 NA
# 3 1/31/2013 bkt 2(20-30) NA 1 3 NA
# 4 1/31/2013 bkt 3(30-40) NA 1 4 NA
# 5 1/31/2013 bkt 4(40+) NA 1 5 NA
# 6 2/28/2013 bkt 0 NA 2 1 NA
# 7 2/28/2013 bkt 1(10-20) 3.00 2 2 NA
# 8 2/28/2013 bkt 2(20-30) 3.63 2 3 NA
# 9 2/28/2013 bkt 3(30-40) 101.00 2 4 NA
# 10 2/28/2013 bkt 4(40+) 102.00 2 5 NA
# 11 3/30/2013 bkt 0 NA 3 1 NA
# 12 3/30/2013 bkt 1(10-20) 0.55 3 2 NA
# 13 3/30/2013 bkt 2(20-30) 0.40 3 3 NA
# 14 3/30/2013 bkt 3(30-40) 103.00 3 4 NA
# 15 3/30/2013 bkt 4(40+) 104.00 3 5 NA
# 16 4/31/2013 bkt 0 NA 4 1 NA
# 17 4/31/2013 bkt 1(10-20) 4.25 4 2 NA
# 18 4/31/2013 bkt 2(20-30) 3.65 4 3 NA
# 19 4/31/2013 bkt 3(30-40) 105.00 4 4 NA
# 20 4/31/2013 bkt 4(40+) 106.00 4 5 NA
# 21 5/30/2013 bkt 0 NA 5 1 NA
# 22 5/30/2013 bkt 1(10-20) 2.34 5 2 13608.00
# 23 5/30/2013 bkt 2(20-30) 4.10 5 3 4536.00
# 24 5/30/2013 bkt 3(30-40) 107.00 5 4 11340.00
# 25 5/30/2013 bkt 4(40+) 108.00 5 5 108.00
# 26 6/30/2013 bkt 0 NA 6 1 NA
# 27 6/30/2013 bkt 1(10-20) 4.00 6 2 23628.28
# 28 6/30/2013 bkt 2(20-30) 5.00 6 3 42960.50
# 29 6/30/2013 bkt 3(30-40) 109.00 6 4 11770.00
# 30 6/30/2013 bkt 4(40+) 110.00 6 5 110.00
# 31 7/30/2013 bkt 0 NA 7 1 NA
# 32 7/30/2013 bkt 1(10-20) 8.00 7 2 212724.40
# 33 7/30/2013 bkt 2(20-30) 7.00 7 3 50052.80
# 34 7/30/2013 bkt 3(30-40) 111.00 7 4 12208.00
# 35 7/30/2013 bkt 4(40+) 112.00 7 5 112.00
答案 2 :(得分:0)
我提出的使用基础R的方法(数据与@Parfait相同):
# Split the group
l1 <- tapply(df$D, df$date, identity)
# Get the corresponding elements in the group and remove the first one (NA)
es <- mapply(`[`, l1, seq_along(l1))[-1]
# Then get cummulative product and put them into df
DP <- c(rep(NA, nrow(df) - length(es)),
Reduce(`*`, es, right=TRUE, accumulate=TRUE))
df$DP <- DP
df
# date Bucket D DP
# 1 1/31/2013 bkt 0 NA NA
# 2 1/31/2013 bkt 1(10-20) NA NA
# 3 1/31/2013 bkt 2(20-30) NA NA
# 4 1/31/2013 bkt 3(30-40) NA NA
# 5 1/31/2013 bkt 4(40+) NA NA
# 6 2/28/2013 bkt 0 NA NA
# 7 2/28/2013 bkt 1(10-20) 3.00 NA
# 8 2/28/2013 bkt 2(20-30) 3.63 NA
# 9 2/28/2013 bkt 3(30-40) 101.00 NA
# 10 2/28/2013 bkt 4(40+) 102.00 NA
# 11 3/30/2013 bkt 0 NA NA
# 12 3/30/2013 bkt 1(10-20) 0.55 NA
# 13 3/30/2013 bkt 2(20-30) 0.40 NA
# 14 3/30/2013 bkt 3(30-40) 103.00 NA
# 15 3/30/2013 bkt 4(40+) 104.00 NA
# 16 4/31/2013 bkt 0 NA NA
# 17 4/31/2013 bkt 1(10-20) 4.25 NA
# 18 4/31/2013 bkt 2(20-30) 3.65 NA
# 19 4/31/2013 bkt 3(30-40) 105.00 NA
# 20 4/31/2013 bkt 4(40+) 106.00 NA
# 21 5/30/2013 bkt 0 NA NA
# 22 5/30/2013 bkt 1(10-20) 2.34 13608
# 23 5/30/2013 bkt 2(20-30) 4.10 4536
# 24 5/30/2013 bkt 3(30-40) 107.00 11340
# 25 5/30/2013 bkt 4(40+) 108.00 108