使用带有Rcpp

时间:2017-10-17 11:11:17

标签: r performance matrix rcpp

假设应将矢量的命名元素(存储在列表中)分配给矩阵的匹配列(参见下面的示例)。

library(microbenchmark)
set.seed(123)
myList <- list()
for(i in 1:10000) {
 myList[[i]] <- list(sample(setNames(rnorm(5), sample(LETTERS[1:5])), ceiling(runif(1,1,4))))
}

myMatrix <- matrix(NA, ncol = 5, nrow = 10000)
colnames(myMatrix) <- LETTERS[1:5]
for(i in 1:10000) {
 myMatrix[i, match(names(myList[[i]][[1]]), colnames(myMatrix))] <- myList[[i]][[1]] 
}
myList[[6]][[1]]
myMatrix[6,]

microbenchmark(for(i in 1:10000) {myMatrix[i, match(names(myList[[i]][[1]]), colnames(myMatrix))] <- myList[[i]][[1]]}, times = 10)

在此示例中,10,000个向量的元素被分配给矩阵的匹配列。

问题

作业很慢(约3.5秒)!

问题

如何在R或Rcpp中加快此过程?

1 个答案:

答案 0 :(得分:2)

使用数据包data.table中的rbindlist。它可以通过匹配列名进行绑定。

library(microbenchmark)
n <- 10000
set.seed(123)
myList <- list()
for(i in 1:n) {
  myList[[i]] <- list(sample(setNames(rnorm(5), sample(LETTERS[1:5])), ceiling(runif(1,1,4))))
}

myMatrix <- matrix(NA, ncol = 5, nrow = n)
colnames(myMatrix) <- LETTERS[1:5]

library(data.table)
microbenchmark(match = for(i in 1:n) {myMatrix[i, match(names(myList[[i]][[1]]), colnames(myMatrix))] <- myList[[i]][[1]]}, 
               rbindlist = {
                 myMatrix1 <- as.matrix(rbindlist(lapply(myList, 
                                                         function(x) as.list(unlist(x))), 
                                                  fill = TRUE))
                 myMatrix1 <- myMatrix1[, order(colnames(myMatrix1))]
                 },
               times = 10)
#Unit: milliseconds
#     expr        min         lq       mean     median         uq        max neval cld
#    match 1392.52949 1496.40382 1599.63584 1605.39080 1690.98410 1761.67322    10   b
#rbindlist   48.76146   50.29176   51.66355   51.10672   53.75465   54.93798    10  a

all.equal(myMatrix, myMatrix1)
#TRUE