Question

我执行一些计算，其中行的结果是下一行的输入。

我使用的for循环非常慢，有没有办法可以使用dplyr进行这些类型的计算？以下示例

df <- data.frame(beginning_on_hand = c(10,0,0,0,0,0,0,0,0,0,0,0),
                  sales = c(10,9,4,7,3,7,2,6,1,5,7,1),
                  ship = c(10,9,4,7,3,7,2,6,1,5,7,1))

dfb <- df %>%
        mutate(receipts = 0) %>%
        mutate(ending_on_hand = 0) %>%
        mutate(receipts = lag(ship, 2)) %>%
        mutate(receipts = if_else(is.na(receipts), 0, receipts))

> dfb
   beginning_on_hand sales ship receipts ending_on_hand
                 10    10   10        0              0
                  0     9    9        0              0
                  0     4    4       10              0
                  0     7    7        9              0
                  0     3    3        4              0
                  0     7    7        7              0
                  0     2    2        3              0
                  0     6    6        7              0
                  0     1    1        2              0
                  0     5    5        6              0
                  0     7    7        1              0
                  0     1    1        5              0

for(i in 1:(nrow(dfb)- 2)) {
    dfb$ending_on_hand[i] <- dfb$beginning_on_hand[i] + dfb$receipts[i] - dfb$sales[i]
    dfb$beginning_on_hand[i+1] <- dfb$ending_on_hand[i]
}

> dfb
   beginning_on_hand sales ship receipts ending_on_hand
1                 10    10   10        0              0
2                  0     9    9        0             -9
3                 -9     4    4       10             -3
4                 -3     7    7        9             -1
5                 -1     3    3        4              0
6                  0     7    7        7              0
7                  0     2    2        3              1
8                  1     6    6        7              2
9                  2     1    1        2              3
10                 3     5    5        6              4
11                 4     7    7        1              0
12                 0     1    1        5              0

Answer 1

我没有dplyr解决方案，但这是一个data.table解决方案。

df <- data.frame(beginning_on_hand = c(10,0,0,0,0,0,0,0,0,0,0,0),
              sales = c(10,9,4,7,3,7,2,6,1,5,7,1),
              ship = c(10,9,4,7,3,7,2,6,1,5,7,1))

dfb <- df %>%
    mutate(ending_on_hand = 0) %>%
    mutate(receipts = lag(ship, 2)) %>%
    mutate(receipts = if_else(is.na(receipts), 0, receipts))

dfb<-data.table(dfb)
df.end <- dfb[, ending_on_hand := cumsum(beginning_on_hand + receipts - sales)][,
                beginning_on_hand := beginning_on_hand + lag(ending_on_hand, default = 0)]

>df.end
    beginning_on_hand sales ship ending_on_hand receipts
 1:                10    10   10              0        0
 2:                 0     9    9             -9        0
 3:                -9     4    4             -3       10
 4:                -3     7    7             -1        9
 5:                -1     3    3              0        4
 6:                 0     7    7              0        7
 7:                 0     2    2              1        3
 8:                 1     6    6              2        7
 9:                 2     1    1              3        2
10:                 3     5    5              4        6
11:                 4     7    7             -2        1
12:                -2     1    1              2        5

为了解释，data.table基本上使用列表来组成数据并以通常的平面文件方式显示它。它使用SQL类型指令来组织和处理数据。此处使用的注释功能为cumsum和lag。 cumsum计算特定索引之前的所有值，lag查找给定索引之上或之前的值。

如何根据前一行进行计算得到dplyr

1 个答案: