Question

我的数据集看起来像这样，但更大

x.col<-c(1,1,1,1,2,2,2,3,3,4)
y.col<-c(2,3,4,5,3,4,5,4,5,5)
response<-c(1,0,1,1,1,1,0,0,0,0)
ds<-data.frame(cbind(x.col,y.col,response))

根据这些数据，我想创建一个矩阵，其中行和列是相同的，单元格中的值表示x和y之间的响应。输出看起来像这样：

one<-c(NA,1,0,1,1)
two<-c(1,NA,1,1,0)
three<-c(0,1,NA,0,0)
four<-c(1,1,0,NA,0)
five<-c(1,0,0,0,NA)
mx<-cbind(one,two,three,four,five)
row.names(mx)<-c(1,2,3,4,5)
colnames(mx)<-c(1,2,3,4,5)

注意诊断是“NAs”，因为它们指的是x和y值相同的单元格

Answer 1

你可以尝试

Un <-  unique(unlist(ds[1:2]))
m1 <- matrix(0, length(Un),length(Un), dimnames=list(Un, Un))
m1[as.matrix(ds[1:2])] <- ds[,3]
m1 <- m1+t(m1)
diag(m1) <- NA
identical(m1, mx)
#[1] TRUE

更新

基于新数据集，这可能有效

ds1 <- read.csv('lulc.mean21apr2015.csv')
library(data.table)#v1.9.5+
Un1 <- unique(unlist(ds1[2:3]))
res <- dcast(setDT(ds1), factor(id.origin, levels=Un1)~factor(id.dest, 
                levels=Un1), value.var='lulc')
for(j in 1:ncol(res)){
  set(res, i=which(is.na(res[[j]])), j=j, value=0)
 }

res1 <- as.matrix(res[,-1, with=FALSE])
row.names(res1) <- res[[1]]
res1[1:3,1:3]
#      9606 25216 12865
#9606     0     1     0
#25216    1     0     1
#12865    0     1     0

或修改上一个解决方案

m1 <- matrix(0, length(Un1), length(Un1), dimnames=list(Un1, Un1))
indx <- do.call(cbind,lapply(ds1[2:3],
          function(x) as.numeric(factor(x, levels=Un1))))
m1[indx] <- ds1[,4]
all.equal(m1, res1)
#[1] TRUE

Answer 2

这是使用xtabs的另一种方式（虽然我更喜欢@akruns）

levs <- unique(unlist(ds[1:2])) # Find all the unique levels
Res <- as.data.frame.matrix(xtabs(response ~ factor(x.col, levs) + factor(y.col, levs), ds))
Res[lower.tri(Res)] <- t(Res)[lower.tri(Res)]
diag(Res) <- NA
all.equal(as.matrix(Res), mx)
## [1] TRUE

Answer 3

使用outer的一种方法：

f = function(u,v) 
{
    res = subset(ds, x.col==u & y.col==v)$response
    ifelse(length(res)==0, 0, res)
}

m = outer(1:5, 1:5, Vectorize(f))
mx = m + t(m)
diag(mx) = NA
rownames(mx)=colnames(mx)=1:5

#   1  2  3  4  5
#1 NA  1  0  1  1
#2  1 NA  1  1  0
#3  0  1 NA  0  0
#4  1  1  0 NA  0
#5  1  0  0  0 NA

为列和行创建一个匹配x和y标签的矩阵

3 个答案:

更新