这种矩阵转换的快速方法?

时间:2017-11-12 16:18:59

标签: r matrix

考虑以下代码制作矩阵x

x = matrix(1:18, nrow=6, byrow=T)
x
     [,1] [,2] [,3]
[1,]    1    2    3
[2,]    4    5    6
[3,]    7    8    9
[4,]   10   11   12
[5,]   13   14   15
[6,]   16   17   18

现在我想将x转换为一个新的矩阵x1,其中x的每两行作为一个新矩阵,进行转置,最后合并所有转置矩阵。这可以通过以下代码完成:

x1 = c()
for (i in 1:(nrow(X)/2)) x1 = rbind(x1, t(x[((i-1)*2+1):(i*2),]))
x1
      [,1] [,2]
 [1,]    1    4
 [2,]    2    5
 [3,]    3    6
 [4,]    7   10
 [5,]    8   11
 [6,]    9   12
 [7,]   13   16
 [8,]   14   17
 [9,]   15   18

我想知道这是否可以由apply家庭成员完成,因为在我的情况下x非常大并且使用for循环需要很长时间才能执行。我也想知道这个转换是否还有其他快速代码。

修改:我的x包含1770000行和12列。行数是590的乘法,因此生成的矩阵x1应该包含36000行和590列。我尝试将@akrun代码调整为:

x1 = do.call(rbind, lapply(split(x, (seq_len(nrow(x))-1) %/% 590),
     matrix, ncol=590, byrow = TRUE))

但我收到了警告:

In FUN(X[[i]], ...) :
data length [12] is not a sub-multiple or multiple of the number of columns [590]

此外x1类似于:

     [,1]        [,2]        [,3]        [,4]        [,5]        [,6]       
[1,] Numeric,590 Numeric,590 Numeric,590 Numeric,590 Numeric,590 Numeric,590
[2,] Numeric,590 Numeric,590 Numeric,590 Numeric,590 Numeric,590 Numeric,590
[3,] Numeric,590 Numeric,590 Numeric,590 Numeric,590 Numeric,590 Numeric,590

3 个答案:

答案 0 :(得分:3)

我们可以使用更快的split rbind方法

do.call(rbind, lapply(split(x, (seq_len(nrow(x))-1) %/% 2), matrix, ncol=2, byrow = TRUE))

基准

x <- matrix(1:180000, nrow = 60000, byrow = TRUE)

system.time({
x1 = c()
for (i in 1:(nrow(x)/2)) x1 = rbind(x1, t(x[((i-1)*2+1):(i*2),]))
})
# user  system elapsed 
#   6.78    0.56    7.39 


system.time({
  x2 <- do.call(rbind, lapply(split(x, (seq_len(nrow(x))-1) %/% 2),
       matrix, ncol=2, byrow = TRUE))
   })
# user  system elapsed 
#   0.24    0.00    0.23 

identical(x1, x2)
#[1] TRUE

答案 1 :(得分:1)

您可以通过选择每隔一行来解决它,展平结果并通过cbind进行组合:

v1 <- x[(1:3)*2,]  
v2 <- x[1+(0:2)*2,]
cbind(as.vector(t(v1)), as.vector(t(v2)))

或 - 更一般地说:

l <- length(x[,1])
v1 <- x[(1:(l/2))*2,]
v2 <- x[1+(0:(l/2 - 1))*2,]
cbind(as.vector(t(v1)), as.vector(t(v2)))

效果基准:

system.time({
   x <- matrix(1:180000, nrow = 60000, byrow = TRUE)     
   l <- length(x[,1])
   v1 <- x[(1:(l/2))*2,]    
   v2 <- x[1+(0:(l/2 - 1))*2,]
   cbind(as.vector(t(v1)), as.vector(t(v2)))    
})
 user  system elapsed 
  0.02    0.00    0.02 

与初始解决方案相比:

x <- matrix(1:180000, nrow = 60000, byrow = TRUE)

system.time({
   x1 = c()
   for (i in 1:(nrow(x)/2)) x1 = rbind(x1, t(x[((i-1)*2+1):(i*2),]))
   })
user  system elapsed 
5.83    0.00    5.85 

答案 2 :(得分:1)

使用数组维度和aperm

x = matrix(1:18, nrow=6, byrow=T)

fun <- function(x, n) {
  cols <- ncol(x)
  rows <- nrow(x)
  dim(x) <- c(n, rows / n, cols)
  x <- aperm(x, c(3, 2 , 1))
  dim(x) <- c(rows * cols / n, n)
  x
}

fun(x, 2)
#     [,1] [,2]
#[1,]    1    4
#[2,]    2    5
#[3,]    3    6
#[4,]    7   10
#[5,]    8   11
#[6,]    9   12
#[7,]   13   16
#[8,]   14   17
#[9,]   15   18

基准:

library(microbenchmark)
x <- matrix(1:180000, nrow = 60000, byrow = TRUE) 
microbenchmark(
  "for" = {
    x1 = c()
    for (i in 1:(nrow(x)/2)) x1 = rbind(x1, t(x[((i-1)*2+1):(i*2),]))
    x1
  },
  array = fun(x, 2),
  split = do.call(rbind, lapply(split(x, (seq_len(nrow(x))-1) %/% 2),
                               matrix, ncol=2, byrow = TRUE)),
  indexing = {
    l <- length(x[,1])
    v1 <- x[(1:(l/2))*2,]    
    v2 <- x[1+(0:(l/2 - 1))*2,]
    cbind(as.vector(t(v1)), as.vector(t(v2)))  
  },
  times = 10
)
#Unit: microseconds
#    expr         min          lq         mean      median          uq         max neval cld
#     for 4313487.101 4333270.083 4557851.0186 4614434.395 4746682.820 4792838.589    10   c
#   array     655.037     690.119     730.7068     735.828     754.394     805.096    10 a  
#   split  133945.299  137704.718  148001.8562  146424.032  154185.373  175473.046    10  b 
#indexing    1369.482    1393.553    1803.0670    1424.027    1951.795    2984.285    10 a