修改大矩阵的形状

时间:2016-08-09 18:56:25

标签: r matrix

我正在处理一个大矩阵(187,682,789 x 5)

说它的构建如下:

Day1 <- rep(1, 10)  
Lat=sample(30:33, 10,  replace=T)  
Lon=sample(-30:-33, 10, replace=T)  
Var=runif(10,1,100)  
Mat1<-cbind(Day1,Lat,Lon,Var)  


Day2 <- rep(2, 10)  
Lat=sample(30:33, 10,  replace=T)  
Lon=sample(-30:-33, 10, replace=T)  
Var=runif(10,1,100)  
Mat2<-cbind(Day2,Lat,Lon,Var) 

#... And so on, but let's stick to 2 days for the example 

Mat = rbind(Mat1,Mat2)

当然,这里有独特的Lat Lon组合数量的冗余。

position=cbind(Mat[,2],Mat[,3]) # Lat Lon
nrow(unique(position)) < nrow(position) #True

我想获得一个矩阵,显示所有唯一的Lat Lon组合,然后是每天所有相应的变量。

例如:

> Mat
          Day Lat Lon       Var
     [1,]   1  36 -36 51.086210
     [2,]   1  37 -37 48.486008
     [3,]   1  38 -38 39.482635
     [4,]   1  39 -39 97.848232
     [5,]   1  40 -40 71.076543
     [6,]   2  31 -31  5.641855
     [7,]   2  32 -32 62.124584
     [8,]   2  33 -33 39.524119
     [9,]   2  34 -34  7.214646
    [10,]   2  35 -35 94.254170
    [11,]   2  36 -36 40.615783
    [12,]   2  37 -37 71.319719
    [13,]   2  38 -38 81.775119
    [14,]   2  39 -39 49.224411
    [15,]   2  40 -40 80.813237

会变成:

  >Resulting.Mat.Var
    Unique.Lat Unique.Lon  Day1         Day2
    [1,]  36    -36      51.08621    40.615783
    [2,]  37    -37      48.48601    71.319719
    [3,]  38    -38      39.48264    81.775119
    [4,]  39    -39      97.84823    49.224411
    [5,]  40    -40      71.07654    80.813237
    [6,]  31    -31            NA    5.641855
    [7,]  32    -32            NA    62.124584
    [8,]  33    -33            NA    39.524119
    [9,]  34    -34            NA    7.214646
    [10,] 35    -35            NA    94.254170

我尝试创建一个NAs矩阵并用2 填充循环,但它真的需要太长时间!

非常感谢!

编辑: 这与我在SO上发现的有些不同,因为它确实需要效率,所有都是数字格式,并且有2列构成位置......

Ĵ

3 个答案:

答案 0 :(得分:2)

这是典型的“长到宽”转换问题。获得所需表单的一种可能性是使用dcast()包中的reshape2

library(reshape2)
as.matrix(dcast(as.data.frame(Mat), Lat + Lon ~ Day, value.var = "Var"))
#      Lat Lon        1         2
# [1,]  31 -31       NA  5.641855
# [2,]  32 -32       NA 62.124584
# [3,]  33 -33       NA 39.524119
# [4,]  34 -34       NA  7.214646
# [5,]  35 -35       NA 94.254170
# [6,]  36 -36 51.08621 40.615783
# [7,]  37 -37 48.48601 71.319719
# [8,]  38 -38 39.48264 81.775119
# [9,]  39 -39 97.84823 49.224411
#[10,]  40 -40 71.07654 80.813237

之前在SO上已经回答了很多类似的问题,所以这可能是重复的。但是,大多数问题都是指data.frame结构,而不是矩阵。

数据:

Mat <- structure(c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 36, 
            37, 38, 39, 40, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, -36, 
            -37, -38, -39, -40, -31, -32, -33, -34, -35, -36, -37, -38, -39, 
            -40, 51.08621, 48.486008, 39.482635, 97.848232, 71.076543, 5.641855, 
            62.124584, 39.524119, 7.214646, 94.25417, 40.615783, 71.319719, 
            81.775119, 49.224411, 80.813237), .Dim = c(15L, 4L), 
           .Dimnames = list(NULL, c("Day", "Lat", "Lon", "Var")))

答案 1 :(得分:1)

使用dplyr的另一种方法是:

library(dplyr)
Resulting.Mat.Var <- as.matrix(
  Mat %>% group_by(Unique.Lat=Lat,Unique.Lon=Lon) %>% 
          summarise(Day1=Var[which(Day==1)], Day2=Var[which(Day==2)]))

print(Resulting.Mat.Var)
##      Unique.Lat Unique.Lon     Day1      Day2
## [1,]         31        -31       NA  5.641855
## [2,]         32        -32       NA 62.124584
## [3,]         33        -33       NA 39.524119
## [4,]         34        -34       NA  7.214646
## [5,]         35        -35       NA 94.254170
## [6,]         36        -36 51.08621 40.615783
## [7,]         37        -37 48.48601 71.319719
## [8,]         38        -38 39.48264 81.775119
## [9,]         39        -39 97.84823 49.224411
##[10,]         40        -40 71.07654 80.813237

答案 2 :(得分:1)

看起来像是合并给我:

> merge( Mat[Mat[,'Day']==1 , -1], Mat[ Mat[,'Day']==2, -1], by=c(1,2) , all=TRUE)
   Lat Lon    Var.x     Var.y
1   31 -31       NA  5.641855
2   32 -32       NA 62.124584
3   33 -33       NA 39.524119
4   34 -34       NA  7.214646
5   35 -35       NA 94.254170
6   36 -36 51.08621 40.615783
7   37 -37 48.48601 71.319719
8   38 -38 39.48264 81.775119
9   39 -39 97.84823 49.224411
10  40 -40 71.07654 80.813237

如果需要,可以强制转换为矩阵,因为该结果是data.frame