根据特定列的值将数据帧转换为矩阵

时间:2018-07-31 07:43:18

标签: r dataframe matrix tidyr reshape2

我有一个如下数据框:

mat <- structure(list(links = structure(c(1L, 5L, 9L, 13L, 2L, 6L, 10L, 
14L, 3L, 7L, 11L, 15L, 4L, 8L, 12L, 16L), .Label = c("a,a", "a,b", 
"a,c", "a,d", "b,a", "b,b", "b,c", "b,d", "c,a", "c,b", "c,c", 
"c,d", "d,a", "d,b", "d,c", "d,d"), class = "factor"), value = c(0, 
3.716741, 0, 0, 3.716741, 0, 3.788542, 0, 0, 3.788542, 0, 3.791121, 
0, 0, 3.791121, 0)), class = "data.frame", row.names = c(NA, 
-16L))

如何将其转换为这样的矩阵:

df

        a   b   c   d
    a   0   3.716741    0   0
    b   3.716741    0   3.788542    0
    c   0   3.788542    0   3.791121
    d   0   0   3   

我用下面的代码反过来做,但是我不知道这种方式怎么可能:

mat<-as.matrix(mat)

df<-melt(mat)

df$links<- paste(df$Var1,",",df$Var2)

df <- df[ ,c(4,3)]

3 个答案:

答案 0 :(得分:1)

尝试一下:

library(tidyverse)
mat <- mat %>% 
  separate(links, c("links1", "links2"), sep = ",") %>% 
  spread(links2, value)

输出是

  links1        a        b        c        d
1      a 0.000000 3.716741 0.000000 0.000000
2      b 3.716741 0.000000 3.788542 0.000000
3      c 0.000000 3.788542 0.000000 3.791121
4      d 0.000000 0.000000 3.791121 0.000000

答案 1 :(得分:0)

使用基数R:

s  = transform(dat,i=sub("\\s+,.*","",links),j=sub(".*,\\s+","",links))

mat = matrix(0,length(unique(s$i)),length(unique(s$j)),dimnames = list(levels(s$i),levels(s$j)))

mat[cbind(s$i,s$j)]=s$value
mat
         a        b        c        d
a 0.000000 3.716741 0.000000 0.000000
b 3.716741 0.000000 3.788542 0.000000
c 0.000000 3.788542 0.000000 3.791121
d 0.000000 0.000000 3.791121 0.000000

另一种方法可能是:

reshape(cbind(val=dat$value,read.csv(text=as.character(dat$links),h=F)),idvar = "V1",dir="wide",timevar = "V2")
  V1   val. a   val. b   val. c   val. d
1 a  0.000000 3.716741 0.000000 0.000000
2 b  3.716741 0.000000 3.788542 0.000000
3 c  0.000000 3.788542 0.000000 3.791121
4 d  0.000000 0.000000 3.791121 0.000000

答案 2 :(得分:0)

尝试一下:

library(tidyverse)


dfmat <- df %>%
  mutate(var1 = substr(links,1,2), 
         var2 =substr(links, 5,5)) %>%
  select(value, var1, var2) %>%
  spread(key=var2, value=value) %>%
column_to_rownames(var="var1")