我一直在努力将以下dyadic df转换为邻接矩阵,使用几种不同的方法(重塑,dcast,...)但是到目前为止还没有得到我想要的东西(这是一个非常长的df,这就是我在这里只添加一些示例性线条的原因):
cntry1 cntry2 var1
usa canada 70
usa bahamas 29
usa cuba 39
canada bahamas 15
canada cuba 35
cuba bahamas 5
我希望以下格式提供上述df:
usa canada bahamas cuba
usa 0 70 29 39
canada 70 0 15 35
bahamas 29 15 0 5
cuba 39 35 5 0
如果我理解了各种软件包(我对R来说很新),我需要将它从long转换为宽,但是 - 通常只能使用一个id变量的edgelists。
在我的情况下使用它,我收到错误消息“聚合函数缺失:默认为长度”,这告诉我似乎有非唯一值? - 在将其减少到唯一值之后,邻接矩阵对于行或列是完整的而不是两者都是完整的。
你建议我使用完全不同的方法吗?
非常感谢你的帮助!
答案 0 :(得分:2)
尝试:
lvls <- unique(unlist(dat[,1:2]))[c(1,2,4,3)]
dat[,1:2] <- lapply(dat[,1:2], function(x) factor(x, levels=lvls))
r1 <- xtabs(var1~cntry2+cntry1, dat)
r1[lower.tri(r1) & !r1] <- r1[upper.tri(r1) & !!r1]
r1[upper.tri(r1) & !!r1] <- 0
as.matrix(as.dist(r1)) #idea contributed by @alexis_laz
# usa canada bahamas cuba
#usa 0 70 29 39
#canada 70 0 15 35
#bahamas 29 15 0 5
#cuba 39 35 5 0
或者
library(igraph)
res <- get.adjacency(graph.edgelist(as.matrix(dat[,1:2]),directed=FALSE)) #using the original dataset
res[lower.tri(res)] <- dat$var1
res[upper.tri(res)] <- t(res)[upper.tri(res)]
res
#4 x 4 sparse Matrix of class "dgCMatrix"
# usa canada bahamas cuba
#usa . 70 29 39
#canada 70 . 15 35
#bahamas 29 15 . 5
#cuba 39 35 5 .
假设您有一个这样的数据集(无向):
dat <- structure(list(cntry1 = c("usa", "usa", "usa", "canada", "canada",
"cuba", "canada"), cntry2 = c("canada", "bahamas", "cuba", "bahamas",
"cuba", "bahamas", "usa"), var1 = c(70L, 29L, 39L, 15L, 35L,
5L, 40L)), .Names = c("cntry1", "cntry2", "var1"), class = "data.frame", row.names = c(NA,
-7L))
lvls <- unique(unlist(dat[,1:2]))[c(1,2,4,3)]
dat[,1:2] <- lapply(dat[,1:2], function(x) factor(x, levels=lvls))
r1 <- xtabs(var1~cntry2+cntry1, dat)
r2 <- t(r1)
indx <- intersect(which(lower.tri(r2) & !!r2), which(lower.tri(r1) & !r1))
r1[lower.tri(r1) & !r1] <- r2[indx]
indx1 <- upper.tri(r1) & !r1
r1[indx1] <- r2[indx1]
r1
# cntry1
#cntry2 usa canada bahamas cuba
#usa 0 40 29 39
#canada 70 0 15 35
#bahamas 29 15 0 5
#cuba 39 35 5 0
dat <- structure(list(cntry1 = c("usa", "usa", "usa", "canada", "canada", "cuba", "canada"),
cntry2 = c("canada", "bahamas", "cuba", "bahamas", "cuba", "bahamas", "usa"),
var1 = c(4.5L, 3L, 0.5L, 2L, 0L, 2L, 5.5L)),
.Names = c("cntry1", "cntry2", "var1"), class = "data.frame", row.names = c(NA, -7L))
将0
列中var1
的值更改为不在数据集中的任何其他值
dat$var1[!dat$var1] <- 0.01
lvls <- unique(unlist(dat[,1:2]))[c(1,2,4,3)]
dat[,1:2] <- lapply(dat[,1:2], function(x) factor(x, levels=lvls))
r1 <- xtabs(var1~cntry2+cntry1, dat)
r2 <- t(r1)
indx <- intersect(which(lower.tri(r2) & !!r2), which(lower.tri(r1) & !r1))
r1[lower.tri(r1) & !r1] <- r2[indx]
indx1 <- upper.tri(r1) & !r1
r1[indx1] <- r2[indx1]
r1[r1==0.01] <- 0
r1
# cntry1
#cntry2 usa canada bahamas cuba
# usa 0.0 5.5 3.0 0.5
# canada 4.5 0.0 2.0 0.0
# bahamas 3.0 2.0 0.0 2.0
# cuba 0.5 0.0 2.0 0.0
答案 1 :(得分:0)
用
library(reshape2)
dat = rbind(dat, data.frame(cntry1=dat$cntry2, cntry2=dat$cntry1, var1=dat$var1))
casted = dcast(dat, cntry1 ~ cntry2, fill=0)
我得到了
cntry1 bahamas canada cuba usa
1 canada 15 0 35 70
2 cuba 5 35 0 39
3 usa 29 70 39 0
4 bahamas 0 15 5 29
这似乎是你想要的,虽然没有重新排序。
答案 2 :(得分:0)
使用稀疏矩阵可能是有效的(尽管您给出的示例并不特别稀疏):
获取数据:
dd <- read.table(text="
cntry1 cntry2 var1
usa canada 70
usa bahamas 29
usa cuba 39
canada bahamas 15
canada cuba 35
cuba bahamas 5 ",
header=TRUE,stringsAsFactors=FALSE)
可能有一种更为流畅的方法:
library(Matrix)
r <- sort(unique(c(dd$cntry1,dd$cntry2)))
f1 <- as.numeric(factor(dd$cntry1,levels=r))
f2 <- as.numeric(factor(dd$cntry2,levels=r))
m <- Matrix(0,nrow=length(r),ncol=length(r),dimnames=list(r,r))
m[cbind(f1,f2)] <- dd$var1
forceSymmetric(m,uplo="L")