按顺序计算总和序列

时间:2017-04-16 07:22:13

标签: r data.table

假设我有一个data.table

data.table(A=c(1,2,3,4,5,6,4,2))

如何计算n元素序列的总和?

假设n=3,A的序列和的结果应该是seq_sum列,

data.table(A=c(1,2,3,4,5,6,4,2),seq_sum=c(1+2+3,2+3+4,3+4+5,4+5+6,5+6+4,6+4+2,4+2,2))

如何有效地做到这一点?

6 个答案:

答案 0 :(得分:7)

另一种选择是使用Reduceshift

dt[, seq_sum := Reduce(`+`, shift(A, 0:2, 0, 'lead'))]

给出:

> dt
   A seq_sum
1: 1       6
2: 2       9
3: 3      12
4: 4      15
5: 5      15
6: 6      12
7: 4       6
8: 2       2

带参数名称的完整符号:

dt[, seq_sum := Reduce(`+`, shift(A, n = 0:2, fill = 0, type = 'lead'))]

答案 1 :(得分:3)

根据评论更新:

您还可以使用zoo包中的library(data.table) library(zoo) dt <- data.table(A=c(1,2,3,4,5,6,4,2)) dt[, seq_sum := rollapply(A, width = 3, FUN = "sum", align = "left", partial = TRUE)] # > dt # A seq_sum # 1: 1 6 # 2: 2 9 # 3: 3 12 # 4: 4 15 # 5: 5 15 # 6: 6 12 # 7: 4 6 # 8: 2 2

Caused by: java.lang.NullPointerException: Attempt to invoke virtual method 'android.content.res.Configuration android.content.res.Resources.getConfiguration()' on a null object reference

答案 2 :(得分:3)

为避免重复总结,可以存储累积总和:

n = 3
A2 = c(A, rep_len(0, n - 1))

cs = cumsum(A2)

并减去各自的差异:

cs[-seq_len(n - 1)] - c(0, cs[seq_len(length(A2) - n)])
#[1]  6  9 12 15 15 12  6  2

或等同地:

diff(c(0, cs), n)
#[1]  6  9 12 15 15 12  6  2

答案 3 :(得分:2)

library(data.table)
dt <- data.table(A=c(1,2,3,4,5,6,4,2))
n = 3
sapply(1:(length(dt$A)), function(i) {sum(dt$A[i:(min(i+n-1,length(dt$A)))])})      

    # [1]  6  9 12 15 15 12  6  2

答案 4 :(得分:2)

library(zoo)

dtab <- data.table(A=c(1,2,3,4,5,6,4,2))
dtab[, seq_sum := rollapplyr(A, 3, sum, partial = TRUE, align = "left")]

答案 5 :(得分:1)

以下是另一种使用RcppRoll:suml的方法和一些时间供您参考。使用data.table内置函数的@ Jaap解决方案是最快的。

library(data.table)
library(microbenchmark)

N <- 1e5
set.seed(0L)
dt <- data.table(A=rnorm(N))
n <- 3

dt_cumsum <- copy(dt)
fun_cumsum <- function() {
    dt_cumsum[, seq_sum := {
        cs <- cumsum(c(A, rep_len(0, n - 1)))
        diff(c(0, cs), n)
    }]
}

dt_Reduce <- copy(dt)
fun_Reduce <- function() {
    dt_Reduce[, seq_sum := Reduce(`+`, shift(A, n = seq_len(n) - 1, fill = 0, type = 'lead'))]
}

library(zoo)
dt_zoo <- copy(dt)
fun_zoo <- function() {
    dt_zoo[, seq_sum := rollapply(A, width = n, FUN = "sum", align = "left", partial = TRUE)]
}

fun_base <- function() {
    sapply(1:(length(dt$A)), function(i) {sum(dt$A[i:(min(i+n-1,length(dt$A)))])}) 
}

library(RcppRoll)
dt_RcppRoll <- copy(dt)
fun_RcppRoll <- function() {
    dt_RcppRoll[, seq_sum:=head(roll_suml(c(A, rep_len(0, n - 1)), n), -(n-1))]
}

ans <- capture.output(microbenchmark(
    fun_cumsum(),
    fun_Reduce(),
    fun_zoo(),
    fun_base(),
    fun_RcppRoll(),
    times=5L))
writeLines(paste("#", ans))

# Unit: milliseconds
#            expr       min        lq       mean    median        uq       max neval
#    fun_cumsum()    2.5983    2.6427    2.67526    2.6462    2.7311    2.7580     5
#    fun_Reduce()    1.3903    1.4274    2.84070    1.6620    1.7047    8.0191     5
#       fun_zoo() 1225.1620 1242.9112 1289.76416 1258.1143 1355.1070 1367.5263     5
#      fun_base() 2731.6609 2849.1003 2909.27308 2922.9430 2971.9956 3070.6656     5
#  fun_RcppRoll()    1.7890    1.8430    3.49892    1.9663    2.0774    9.8189     5
相关问题