2 data.table的matricial产品

时间:2017-05-02 19:45:15

标签: r data.table

我有2 data.table,我希望按群组制作一个副产品。

library(data.table)                                                                                                                                                                                                
set.seed(1)                                                                                                                                                                                                        
DT <- as.data.table(matrix(rnorm(16),ncol=4))                                                                                                                                                                      
DT[,id:=c(1,1,2,2)]                                                                                                                                                                                                
DT2 <- as.data.table(matrix(rnorm(8),ncol=4))                                                                                                                                                                      
DT2[,id:=c(1,2)]                                                                                                                                                                                                   

#DT
#           V1         V2         V3          V4 id
#1: -0.6264538  0.3295078  0.5757814 -0.62124058  1
#2:  0.1836433 -0.8204684 -0.3053884 -2.21469989  1
#3: -0.8356286  0.4874291  1.5117812  1.12493092  2
#4:  1.5952808  0.7383247  0.3898432 -0.04493361  2

#DT2
#            V1        V2        V3          V4 id
#1: -0.01619026 0.8212212 0.9189774  0.07456498  1
#2:  0.94383621 0.5939013 0.7821363 -1.98935170  2

cols <- grep(colnames(DT2), pattern='V.*', value=T)                                                                                                                                                                
ids <- DT2[,unique(id)]                                                                                                                                                                                            
for (id_i in ids) {                                                                                                                                                                                                
    l <- as.matrix(DT[id==id_i,(cols),with=F])                                                                                                                                                                     
    r <- diag(t(DT2[id==id_i,(cols),with=F])[,1L])                                                                                                                                                                 
    DT[id==id_i,(cols):=as.data.table(l%*%r)]                                                                                                                                                                              
}                                                                                                                                                                                                                  


    #DT(i,j) = DT(i,j)*DT2(j) with id matching
             V1         V2         V3          V4 id
1:  0.010142452  0.2705988  0.5291300 -0.04632279  1
2: -0.002973234 -0.6737860 -0.2806450 -0.16513906  1
3: -0.788696543  0.2894848  1.1824189 -2.23788323  2
4:  1.505683787  0.4384920  0.3049105  0.08938875  2

必须有一种方法可以使用by.EACHI有效地完成此操作,但解决方案是在逃避我

2 个答案:

答案 0 :(得分:1)

我不确定这是否与您所拥有的相比有了显着改善,但它确实使用了加入和EACHI ......

DT[DT2, on="id", by=.EACHI,
   {temp=tcrossprod(matrix(c(x.V1, x.V2, x.V3, x.V4), ncol=4),
                    diag(c(i.V1, i.V2, i.V3, i.V4)))
    as.data.table(temp)}]
   id           V1         V2         V3          V4
1:  1  0.010142452  0.2705988  0.5291300 -0.04632279
2:  1 -0.002973234 -0.6737860 -0.2806450 -0.16513906
3:  2 -0.788696543  0.2894848  1.1824189 -2.23788323
4:  2  1.505683787  0.4384920  0.3049105  0.08938875

我试图自动构建x.V1 ...和i.V1变量名,但是没有成功。

答案 1 :(得分:0)

以下是使用sapply的解决方案。

ids <- unique(c(DT$id,DT2$id))
nc <- ncol(DT)
myprod <- function(k) {
  mtx1 <- as.matrix(DT[id==k][,-nc,with=F])
  nr <- nrow(mtx1)
  mtx2 <-  t(matrix(rep(as.matrix(DT2[id==k][,-nc,with=F]),nr),ncol=nr))
  mtx1*mtx2
}
do.call(rbind,sapply(ids, myprod,simplify =F))

# Results
               V1         V2         V3          V4
[1,]  0.010142452  0.2705988  0.5291300 -0.04632279
[2,] -0.002973234 -0.6737860 -0.2806450 -0.16513906
[3,] -0.788696543  0.2894848  1.1824189 -2.23788323
[4,]  1.505683787  0.4384920  0.3049105  0.08938875