堆叠矩阵的每n列而不应用于R

时间:2015-09-26 03:26:49

标签: r matrix dataframe

我有一个矩阵Vmat:

    v1 = c(4  ,  8  ,  3 ,   5 ,   9)       
    v2 = c(5  ,  6  ,  6 ,  11  ,  6)
    v3 = c( 5  ,  6 ,   6 ,  11  ,  6)
    v4=  c(8, 6,  4, 4, 3)
    v5 =  c(4  ,  8  ,  3 ,   5  ,  9)
    v6=  c(8  ,  6  ,  4  ,  4 ,   3)
    v7 = c( 3 ,   2  ,  7   , 7 ,   4)
    v8=  c(3  ,  2   , 7   , 7  ,  4)

row1 = c(v1,v2)
row2 = c(v3,v4)

row3 = c(v5,v6)

row4 = c(v7,v8)

Vmat = rbind(row1,row2,row3,row4)


 Vmat
     [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
row1    4    8    3    5    9    5    6    6   11     6
row2    5    6    6   11    6    8    6    4    4     3
row3    4    8    3    5    9    8    6    4    4     3
row4    3    2    7    7    4    3    2    7    7     4

我想通过将矩阵分成2来堆叠矩阵(n = ncol(Vmat)/ 2 =每5列)。

所以输出是:

 [,1] [,2] [,3] [,4] [,5] 
    4    8    3    5    9   
    5    6    6   11    6  
    4    8    3    5    9    
    3    2    7    7    4   
    5    6    6   11     6
    8    6    4    4     3
    8    6    4    4     3
    3    2    7    7     4

2 个答案:

答案 0 :(得分:2)

这是一个潜在的data.table解决方案:

# install.packages("data.table", type="source")   # requires 1.9.6+
library(data.table)
vm  <- ncol(Vmat)/2
lst <- lapply(1:vm,function(i)c(i,i+vm))
result <- melt(as.data.table(Vmat),measure=lst)[,variable:=NULL]
result
#    value1 value2 value3 value4 value5
# 1:      4      8      3      5      9
# 2:      5      6      6     11      6
# 3:      4      8      3      5      9
# 4:      3      2      7      7      4
# 5:      5      6      6     11      6
# 6:      8      6      4      4      3
# 7:      8      6      4      4      3
# 8:      3      2      7      7      4

使用更现实的例子:

set.seed(1)
Vmat <- matrix(sample(0:9,16e3*1000,replace=TRUE),nr=16e3)
library(data.table)
system.time({
  vm  <- ncol(Vmat)/2
  lst <- lapply(1:vm,function(i)c(i,i+vm))
  result <- melt(as.data.table(Vmat),measure=lst)[,variable:=NULL]
  })
#    user  system elapsed 
#     0.3     0.0     0.3 

因此,16,000行X 1000 cols需要~0.3s。请注意,虽然此&#34;使用lapply(...)&#34;,但它仅用于为measure.vars创建melt(...)列表,该列表执行所有工作。

@Akrun的解决方案(同一系统):

system.time({
  n <- ncol(Vmat)/2
  ar1 <- array(Vmat, dim=c(nrow(Vmat),n,ncol(Vmat)/n))
  ar2 <- aperm(ar1, c(1,3,2))
  dim(ar2) <- c(prod(dim(ar1)[c(1,3)]),n)
})
#    user  system elapsed 
#    0.38    0.00    0.37 

all.equal(as.matrix(result),ar2,check.attributes=F)
# [1] TRUE

答案 1 :(得分:1)

我们可以将matrix转换为array,然后使用aperm转置数组,并更改尺寸。

n <- 5
ar1 <- array(Vmat, dim=c(nrow(Vmat),n,ncol(Vmat)/n))
ar2 <- aperm(ar1, c(1,3,2))
dim(ar2) <- c(prod(dim(ar1)[c(1,3)]),n)
ar2
#      [,1] [,2] [,3] [,4] [,5]
#[1,]    4    8    3    5    9
#[2,]    5    6    6   11    6
#[3,]    4    8    3    5    9
#[4,]    3    2    7    7    4
#[5,]    5    6    6   11    6
#[6,]    8    6    4    4    3
#[7,]    8    6    4    4    3
#[8,]    3    2    7    7    4

使用@ jlhoward的数据,

system.time({
  n <- 5
  ar1 <- array(Vmat, dim=c(nrow(Vmat),n,ncol(Vmat)/n))
  ar2 <- aperm(ar1, c(1,3,2))
 dim(ar2) <- c(prod(dim(ar1)[c(1,3)]),n)
})
# user  system elapsed 
# 0.265   0.015   0.279