这是原始数据框:
set.seed(100)
toydata <- data.frame(A = sample(1:50,50,replace = T),
B = sample(1:50,50,replace = T),
C = sample(1:50,50,replace = T)
)
下面是可以交换值的函数:
derangement <- function(x){
if(max(table(x)) > length(x)/2) return(NA)
while(TRUE){
y <- sample(x)
if(sum(y == x)<3) return(y)
}
}
swapFun <- function(x, n = 10){
inx <- which(x < n)
y <- derangement(x[inx])
if(length(y) == 1) return(NA)
x[inx] <- y
x
}
toy
是交换后的新数据框
toy <- toydata # Work with a copy
toy[] <- lapply(toydata, swapFun)
我想通过总和的差来比较这两个数据帧的列联表,这意味着:
table1<-table(toydata$A,toydata$B)
table2<-table(toy$A,toy$B)
SUM1<-sum(abs(table1-table2))
table3<-table(toydata$A,toydata$C)
table4<-table(toy$A,toy$C)
SUM2<-sum(abs(table3-table4))
table5<-table(toydata$B,toydata$C)
table6<-table(toy$C,toy$C)
SUM3<-sum(abs(table5-table6))
我想要的是 SUM1
+ SUM2
+ SUM3
。我可以更方便地获取它,因为有时数据框具有许多列。
如何解决?谢谢。
答案 0 :(得分:1)
library(dplyr)
# your function to compare contingency tables
f = function(x,y){
table1<-table(toydata[,x],toydata[,y])
table2<-table(toy[,x],toy[,y])
sum(abs(table1-table2))
}
# vectorise your function
f = Vectorize(f)
combn(x=names(toydata),
y=names(toydata), 2) %>% # create all combinations of your column names
t() %>% # transpose
data.frame(., stringsAsFactors = F) %>% # save as dataframe
filter(X1 != X2) %>% # exclude pairs of same column
mutate(SumAbs = f(X1,X2)) # apply function
# X1 X2 SumAbs
# 1 A B 14
# 2 A C 26
# 3 B C 22