通过R中的所有行组合运行函数

时间:2016-02-26 11:29:41

标签: r

对于示例数据框:

df <- structure(list(area = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
                                        4L, 4L, 4L), .Label = c("a1", "a2", "a3", "a4"), class = "factor"), 
                     result = c(0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 
                                1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L), 
                     weight = c(0.5, 0.8, 1, 3, 3.4, 1.6, 4, 1.6, 2.3, 2.1, 2, 
                                1, 0.1, 6, 2.3, 1.6, 1.4, 1.2, 1.5, 2, 0.6, 0.4, 0.3, 0.6, 
                                1.6, 1.8)), .Names = c("area", "result", "weight"), class = "data.frame", row.names = c(NA, 
                                                                                                                        -26L))

我希望计算所有区域组合(即a1和a2,a1和a3,a2和a3)之间的风险差异。优选地,这将是矩阵形式。

到目前为止,我刚刚研究了具有最高和最低结果的地区之间的风险差异(RD):

#Include only regions with highest or lowest percentage
df.summary <- data.table(df.summary)
incl <- df.summary[c(which.min(result), which.max(result)),area]
df.new <- df[df$area %in% incl,]
df.new$area <- factor(df.new$area)

#Run relative difference
df.xtabs <- xtabs(weight ~ result + area, data=df.new)
df.xtabs

#Produce xtabs table
RD.result <- prop.test(x=df.xtabs[,2], n=rowSums(df.xtabs),  correct = FALSE)
RD <- round(- diff(RD.result$estimate), 3)

...但是我如何更改它以确保代码遍历所有区域组合而无需依次指定每个区域? (我最多可能有19个区域。)

1 个答案:

答案 0 :(得分:1)

您可以使用combn功能执行此操作。例如,

uniqueCombinations <- combn(unique(as.character(df$area)), 2)
resultDF <- data.frame(matrix(NA, nrow=dim(uniqueCombinations)[2], ncol=2+1))#2 col for unique combination and 1 for RD value 
names(resultDF) <- c(paste0("area_", 1:2), "RD")
for(i in 1:dim(uniqueCombinations)[2]){
    #iterate over a unique combination
    incl <- uniqueCombinations[,i]
    print(incl) 
    #Your code
    df.new <- df[df$area %in% incl,]
    df.new$area <- factor(df.new$area)

    #Run relative difference
    df.xtabs <- xtabs(weight ~ result + area, data=df.new)
    df.xtabs
    df.xtabs1 <- data.frame(df.xtabs)

    #Produce xtabs table
    RD.result <- prop.test(x=df.xtabs[,2], n=rowSums(df.xtabs),  correct = FALSE)
    RD <- round(- diff(RD.result$estimate), 3)

    resultDF[i, 1:2] <- incl
    resultDF[i, 3] <- RD

}
resultDF

UPDATE :代码更新以创建resultDF,这将产生循环结果。