如何计算R中的Bonferroni下限和上限?

时间:2019-07-16 17:14:26

标签: r

利用以下数据,我试图计算卡方和Bonferroni的置信区间上下限。列“ Data_No”标识数据集(因为需要针对每个数据集分别进行计算)。

Data_No    Area    Observed
   1        3353    31
   1        2297    2
   1        1590    15
   1        1087    16
   1        817     2
   1        847     10
   1        1014    28
   1        872     29
   1        1026    29
   1        1215    21
   2        3353    31
   2        2297    2
   2        1590    15
   3        1087    16
   3        817     2

我使用的代码是

        library(dplyr) 
        setwd("F:/GIS/July 2019/") 
        total_data <- read.csv("test.csv") 
        result_data <- NULL 
        for(i in unique(total_data$Data_No)){ 
        data <- total_data[which(total_data$Data_No == i),] data <- data %>%
        mutate(RelativeArea = Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE = Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU = Observed/sum(Observed), Alpha = 0.05/2*count(Data_No), 
Zvalue = qnorm(Alpha,lower.tail=FALSE), lower = APU-Zvalue*sqrt(APU*(1-APU)/sum(Observed)), upper = APU+Zvalue*sqrt(APU*(1-APU)/sum(Observed)))
result_data <- rbind(result_data,data) }
write.csv(result_data,file='final_result.csv')

我得到的错误消息是:

  

UseMethod(“ summarise_”)中的错误:没有适用于   “ summarise_”应用于类“ c('integer','numeric')”的对象

我称为“ Alpha”的列是0.05 / 2k的alpha值,其中K是类别数-在我的示例中,第一个数据集有10个类别(“ Data_No”列),因此“ Alpha”必须为0.05 / 20 = 0.0025,并且其相应的Z值为2.807。第二个数据集在我的示例表(“ Data_No”列)中具有3个类别(因此0.05 / 6),第三个数据集具有2个类别(0.05 / 4),然后使用新计算的“ Alpha”列中的值进行计算ZValue列(Zvalue = qnorm(Alpha,lower.tail=FALSE)),然后使用它来计算上下置信区间。

2 个答案:

答案 0 :(得分:0)

# You need to check the closing bracket for lower c.f. sqrt value. Following code should work.

data <- read.csv("test.csv") 
data <- data %>% mutate(RelativeArea =
                          Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE =
                          Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU =
                          Observed/sum(Observed), lower =
                          APU-2.394*sqrt(APU*(1-APU)/sum(Observed)), upper =
                                           APU+2.394*sqrt(APU*(1-APU)/sum(Observed)))



#Answer to follow-up question.
#Sample Data
Data_No   Area   Observed
1         3353    31
1         2297    2
2         1590    15
2         1087    16

#Code to run
total_data <- read.csv("test.csv")
result_data <- NULL
for(i in unique(total_data$Data_No)){
data <- total_data[which(total_data$Data_No == i),]
data <- data %>% mutate(RelativeArea =
                          Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE =
                          Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU =
                          Observed/sum(Observed), lower =
                          APU-2.394*sqrt(APU*(1-APU)/sum(Observed)), upper =
                                           APU+2.394*sqrt(APU*(1-APU)/sum(Observed)))

result_data <- rbind(result_data,data)
}

write.csv(result_data,file='final_result.csv')

答案 1 :(得分:0)

       #Issue in calculating Alpha. I have updated the code.    
       library(dplyr) 
       setwd("F:/GIS/July 2019/") 
       total_data <- read.csv("test.csv") 
       result_data <- NULL 
       for(i in unique(total_data$Data_No)){ 
       data <- total_data[which(total_data$Data_No == i),] 
       data <- data %>%
               mutate(RelativeArea = Area/sum(Area), Expected = RelativeArea*sum(Observed), OminusE = Observed-Expected, O2 = OminusE^2, O2divE = O2/Expected, APU = Observed/sum(Observed), Alpha = 0.05/2*(unique(data$Data_No)), 
    Zvalue = qnorm(Alpha,lower.tail=FALSE), lower = APU-Zvalue*sqrt(APU*(1-APU)/sum(Observed)), upper = APU+Zvalue*sqrt(APU*(1-APU)/sum(Observed)))
    result_data <- rbind(result_data,data) }
    write.csv(result_data,file='final_result.csv')