如何将二元格式化数据集转换为邻接矩阵?

时间:2014-08-25 13:23:12

标签: r adjacency-matrix

我一直在努力将以下dyadic df转换为邻接矩阵,使用几种不同的方法(重塑,dcast,...)但是到目前为止还没有得到我想要的东西(这是一个非常长的df,这就是我在这里只添加一些示例性线条的原因):

cntry1   cntry2    var1
usa      canada      70
usa      bahamas     29
usa      cuba        39 
canada   bahamas     15
canada   cuba        35
cuba     bahamas     5 

我希望以下格式提供上述df:

            usa  canada bahamas  cuba     
usa         0    70        29      39
canada     70     0        15      35
bahamas    29    15        0        5
cuba       39    35        5        0

如果我理解了各种软件包(我对R来说很新),我需要将它从long转换为宽,但是 - 通常只能使用一个id变量的edgelists。

在我的情况下使用它,我收到错误消息“聚合函数缺失:默认为长度”,这告诉我似乎有非唯一值? - 在将其减少到唯一值之后,邻接矩阵对于行或列是完整的而不是两者都是完整的。

你建议我使用完全不同的方法吗?

非常感谢你的帮助!

3 个答案:

答案 0 :(得分:2)

尝试:

 lvls <- unique(unlist(dat[,1:2]))[c(1,2,4,3)]
  dat[,1:2] <- lapply(dat[,1:2], function(x) factor(x, levels=lvls))
  r1 <- xtabs(var1~cntry2+cntry1, dat)
  r1[lower.tri(r1) & !r1] <- r1[upper.tri(r1) & !!r1]
  r1[upper.tri(r1) & !!r1] <- 0

 as.matrix(as.dist(r1)) #idea contributed by @alexis_laz
 #         usa canada bahamas cuba
 #usa       0     70      29   39
 #canada   70      0      15   35
 #bahamas  29     15       0    5
 #cuba     39     35       5    0

或者

 library(igraph)
 res <-  get.adjacency(graph.edgelist(as.matrix(dat[,1:2]),directed=FALSE)) #using the original dataset
 res[lower.tri(res)] <- dat$var1
 res[upper.tri(res)] <- t(res)[upper.tri(res)]
 res
 #4 x 4 sparse Matrix of class "dgCMatrix"
 #        usa canada bahamas cuba
 #usa       .     70      29   39
 #canada   70      .      15   35
 #bahamas  29     15       .    5
 #cuba     39     35       5    .

更新

假设您有一个这样的数据集(无向):

 dat <- structure(list(cntry1 = c("usa", "usa", "usa", "canada", "canada", 
 "cuba", "canada"), cntry2 = c("canada", "bahamas", "cuba", "bahamas", 
 "cuba", "bahamas", "usa"), var1 = c(70L, 29L, 39L, 15L, 35L, 
 5L, 40L)), .Names = c("cntry1", "cntry2", "var1"), class = "data.frame", row.names = c(NA, 
-7L))

 lvls <- unique(unlist(dat[,1:2]))[c(1,2,4,3)]
 dat[,1:2] <- lapply(dat[,1:2], function(x) factor(x, levels=lvls))
 r1 <- xtabs(var1~cntry2+cntry1, dat)
 r2 <- t(r1)
 indx <- intersect(which(lower.tri(r2) & !!r2), which(lower.tri(r1) & !r1))
 r1[lower.tri(r1) & !r1] <- r2[indx]

 indx1 <- upper.tri(r1) & !r1
 r1[indx1] <- r2[indx1]
 r1
 #              cntry1
 #cntry2    usa canada bahamas cuba
 #usa       0     40      29   39
 #canada   70      0      15   35
 #bahamas  29     15       0    5
 #cuba     39     35       5    0

更新号码:2

 dat <- structure(list(cntry1 = c("usa", "usa", "usa", "canada", "canada", "cuba",  "canada"),
   cntry2 = c("canada", "bahamas", "cuba", "bahamas", "cuba", "bahamas", "usa"), 
  var1 = c(4.5L, 3L, 0.5L, 2L, 0L, 2L, 5.5L)), 
  .Names = c("cntry1", "cntry2", "var1"), class = "data.frame", row.names = c(NA, -7L))

0列中var1的值更改为不在数据集中的任何其他值

 dat$var1[!dat$var1] <- 0.01
 lvls <- unique(unlist(dat[,1:2]))[c(1,2,4,3)]
 dat[,1:2] <- lapply(dat[,1:2], function(x) factor(x, levels=lvls))
 r1 <- xtabs(var1~cntry2+cntry1, dat)
 r2 <- t(r1)
 indx <- intersect(which(lower.tri(r2) & !!r2), which(lower.tri(r1) & !r1))
 r1[lower.tri(r1) & !r1] <- r2[indx]

 indx1 <- upper.tri(r1) & !r1
 r1[indx1] <- r2[indx1]
 r1[r1==0.01] <- 0
  r1
  #         cntry1
 #cntry2   usa canada bahamas cuba
 # usa     0.0    5.5     3.0  0.5
 # canada  4.5    0.0     2.0  0.0
 # bahamas 3.0    2.0     0.0  2.0
 # cuba    0.5    0.0     2.0  0.0

答案 1 :(得分:0)

library(reshape2)
dat = rbind(dat, data.frame(cntry1=dat$cntry2, cntry2=dat$cntry1, var1=dat$var1))
casted = dcast(dat, cntry1 ~ cntry2, fill=0)

我得到了

  cntry1 bahamas canada cuba usa
1  canada      15      0   35  70
2    cuba       5     35    0  39
3     usa      29     70   39   0
4 bahamas       0     15    5  29

这似乎是你想要的,虽然没有重新排序。

答案 2 :(得分:0)

使用稀疏矩阵可能是有效的(尽管您给出的示例并不特别稀疏):

获取数据:

dd <- read.table(text="
    cntry1    cntry2         var1
    usa      canada            70
    usa      bahamas         29
    usa      cuba                39 
    canada    bahamas      15
    canada    cuba             35
    cuba       bahamas         5 ",
   header=TRUE,stringsAsFactors=FALSE)

可能有一种更为流畅的方法:

library(Matrix)
r <- sort(unique(c(dd$cntry1,dd$cntry2)))
f1 <- as.numeric(factor(dd$cntry1,levels=r))
f2 <- as.numeric(factor(dd$cntry2,levels=r))
m <- Matrix(0,nrow=length(r),ncol=length(r),dimnames=list(r,r))
m[cbind(f1,f2)] <- dd$var1
forceSymmetric(m,uplo="L")