我执行一些计算,其中行的结果是下一行的输入。
我使用的for循环非常慢,有没有办法可以使用dplyr进行这些类型的计算?以下示例
df <- data.frame(beginning_on_hand = c(10,0,0,0,0,0,0,0,0,0,0,0),
sales = c(10,9,4,7,3,7,2,6,1,5,7,1),
ship = c(10,9,4,7,3,7,2,6,1,5,7,1))
dfb <- df %>%
mutate(receipts = 0) %>%
mutate(ending_on_hand = 0) %>%
mutate(receipts = lag(ship, 2)) %>%
mutate(receipts = if_else(is.na(receipts), 0, receipts))
> dfb
beginning_on_hand sales ship receipts ending_on_hand
10 10 10 0 0
0 9 9 0 0
0 4 4 10 0
0 7 7 9 0
0 3 3 4 0
0 7 7 7 0
0 2 2 3 0
0 6 6 7 0
0 1 1 2 0
0 5 5 6 0
0 7 7 1 0
0 1 1 5 0
for(i in 1:(nrow(dfb)- 2)) {
dfb$ending_on_hand[i] <- dfb$beginning_on_hand[i] + dfb$receipts[i] - dfb$sales[i]
dfb$beginning_on_hand[i+1] <- dfb$ending_on_hand[i]
}
> dfb
beginning_on_hand sales ship receipts ending_on_hand
1 10 10 10 0 0
2 0 9 9 0 -9
3 -9 4 4 10 -3
4 -3 7 7 9 -1
5 -1 3 3 4 0
6 0 7 7 7 0
7 0 2 2 3 1
8 1 6 6 7 2
9 2 1 1 2 3
10 3 5 5 6 4
11 4 7 7 1 0
12 0 1 1 5 0
答案 0 :(得分:0)
我没有dplyr
解决方案,但这是一个data.table
解决方案。
df <- data.frame(beginning_on_hand = c(10,0,0,0,0,0,0,0,0,0,0,0),
sales = c(10,9,4,7,3,7,2,6,1,5,7,1),
ship = c(10,9,4,7,3,7,2,6,1,5,7,1))
dfb <- df %>%
mutate(ending_on_hand = 0) %>%
mutate(receipts = lag(ship, 2)) %>%
mutate(receipts = if_else(is.na(receipts), 0, receipts))
dfb<-data.table(dfb)
df.end <- dfb[, ending_on_hand := cumsum(beginning_on_hand + receipts - sales)][,
beginning_on_hand := beginning_on_hand + lag(ending_on_hand, default = 0)]
>df.end
beginning_on_hand sales ship ending_on_hand receipts
1: 10 10 10 0 0
2: 0 9 9 -9 0
3: -9 4 4 -3 10
4: -3 7 7 -1 9
5: -1 3 3 0 4
6: 0 7 7 0 7
7: 0 2 2 1 3
8: 1 6 6 2 7
9: 2 1 1 3 2
10: 3 5 5 4 6
11: 4 7 7 -2 1
12: -2 1 1 2 5
为了解释,data.table
基本上使用列表来组成数据并以通常的平面文件方式显示它。它使用SQL类型指令来组织和处理数据。此处使用的注释功能为cumsum
和lag
。 cumsum
计算特定索引之前的所有值,lag
查找给定索引之上或之前的值。