我的脚本旨在运行每列中组(行)之间的anova比较。然后它会导致一个数据帧迭代anova和post-hoc结果以及列的名称。但是,它会不断迭代相同的anova和Post-hoc结果。我该如何解决? Find a sample sheet here
#Save your Datasheet into variable X
x <- read.csv("T0_B_Class_Anova_test.csv")
x[is.na(x)] <- 0
DF.Anova <- data.frame()
DF.Tukey <- data.frame()
#Counts through the columns
for(i in 2:(ncol(x)-2)){
columns <- names(x[i])
##Runs an ANOVA - Group being a grouping factor
anovaresult <- anova(aov(x[,2]~Group,data=x))
DF.Anova <- rbind(DF.Anova, anovaresult)
##fix anova into data frame
Famall = colnames(x)
Famall = as.data.frame(Famall)
Famall = Famall[2:52,]
Famall = as.data.frame(Famall)
DFanovanames = rep(Famall, each = 2)
DFanovanames = as.data.frame(DFanovanames)
#install.packages("tidyr")
library(tidyr)
anovanames = data.frame(Names=unlist(DFanovanames, use.names = FALSE))
o.anovanames = dplyr::arrange(anovanames, Names)
finalanova_BFT0 = cbind(rn = rownames(DF.Anova), DF.Anova, o.anovanames)
##Runs Tukeys Post-hoc test on Anova
posthocresult <- TukeyHSD(aov(x[,2]~Group,data=x))
DF.Tukey <- rbind(DF.Tukey, posthocresult$Group)
##fix tukey into data frame
Famname = colnames(x)
Famname = as.data.frame(Famname)
Famname = Famname[2:52,]
Famname = as.data.frame(Famname)
DFposthocnames = rep(Famname, each = 3)
DFposthocnames = data.frame(DFposthocnames)
install.packages("tidyr")
library(tidyr)
posthocnames = data.frame(Names=unlist(DFposthocnames, use.names = FALSE))
o.posthocnames = dplyr::arrange(posthocnames, Names)
finalposthoc_BFT0 = cbind(rn = rownames(DF.Tukey), DF.Tukey, o.posthocnames)
##Prints posthoc results into txt file
print(columns)
print(anovaresult)
print(posthocresult)
}
write.csv(finalanova_BFT0, file="testfinalanova_BCT0.csv")
write.csv(finalposthoc_BFT0, file="finalposthoc_BCT0.csv")
答案 0 :(得分:1)
首先,正如Circle 2: Growing Objects of the R Inferno中所建议的那样,避免在循环中扩展复杂(更高维度)的对象,如数据帧。这是内存效率低下导致RAM中的过多复制。
至于重复结果,目前您的anova
和TukeyHSD
公式在每次迭代中都不会更改。具体来说,每次迭代都会保留x[,2]
:
anovaresult <- anova(aov(x[,2]~Group,data=x))
posthocresult <- TukeyHSD(aov(x[,2]~Group,data=x))
因此,请考虑以下调整,使用基础R构建具有lapply
调用的数据帧列表,并直接在数据帧中绑定测试结果。在循环之外,然后为一个最终的单一数据帧附加所有数据帧。
下面演示了使用可重现的随机化示例,因为我的终端(安全性)无法访问GDrive链接,并且可能无法供将来的读者使用。只需要进行调整就是要传递到lapply
的列号序列。
数据 (播种可重复性)
set.seed(061818)
x <- data.frame(
Group = replicate(500, sample(c("julia", "r", "pandas", "stata", "sas", "spss"),
1, replace=TRUE)),
NUM1 = rnorm(500) * 100,
NUM2 = rnorm(500),
NUM3 = rnorm(500) / 100
)
Dataframe Build
# CREATE LIST OF ANOVA RESULTS DATAFRAME
anova_df_list <- lapply(2:(ncol(x)), function(i){
##Runs an ANOVA - Group being a grouping factor
anovaresult <- anova(aov(x[,i] ~ Group, data=x))
data.frame(var = names(x[i]),
type = row.names(anovaresult),
anovaresult,
row.names = NULL)
})
# CREATE LIST OF TUKEY HSD RESULTS DATAFRAME
tukey_df_list <- lapply(2:(ncol(x)), function(i){
##Runs an ANOVA - Group being a grouping factor
posthocresult <- TukeyHSD(aov(x[,i] ~ Group, data=x))
data.frame(var = names(x[i]),
type = row.names(posthocresult$Group),
posthocresult$Group,
row.names = NULL)
})
# APPEND ALL DFs FOR SINGUL DATAFRAME OUTPUT
finalanova_BFT0 <- do.call(rbind, anova_df_list)
finalposthoc_BFT0 <- do.call(rbind, tukey_df_list)
输出
finalanova_BFT0
# var type Df Sum.Sq Mean.Sq F.value Pr..F.
# 1 NUM1 Group 5 3.294895e+04 6.589791e+03 0.5689185 0.7238625
# 2 NUM1 Residuals 494 5.722009e+06 1.158301e+04 NA NA
# 3 NUM2 Group 5 4.555384e+00 9.110768e-01 1.0519364 0.3864008
# 4 NUM2 Residuals 494 4.278509e+02 8.660949e-01 NA NA
# 5 NUM3 Group 5 7.930182e-04 1.586036e-04 1.4649269 0.1997531
# 6 NUM3 Residuals 494 5.348403e-02 1.082673e-04 NA NA
finalposthoc_BFT0
# var type diff lwr upr p.adj
# 1 NUM1 pandas-julia 1.588690e+01 -30.303690269 6.207749e+01 0.9229562
# 2 NUM1 r-julia 6.224191e+00 -39.105049169 5.155343e+01 0.9987890
# 3 NUM1 sas-julia -2.558098e+00 -48.597653093 4.348146e+01 0.9999859
# 4 NUM1 spss-julia -5.533965e-01 -46.743985299 4.563719e+01 1.0000000
# 5 NUM1 stata-julia 1.920475e+01 -29.494409788 6.790390e+01 0.8695383
# 6 NUM1 r-pandas -9.662708e+00 -56.922953435 3.759754e+01 0.9920073
# 7 NUM1 sas-pandas -1.844500e+01 -66.386955838 2.949696e+01 0.8810193
# 8 NUM1 spss-pandas -1.644030e+01 -64.527313252 3.164672e+01 0.9247795
# 9 NUM1 stata-pandas 3.317847e+00 -47.183623854 5.381932e+01 0.9999676
# 10 NUM1 sas-r -8.782289e+00 -55.894929992 3.833035e+01 0.9948026
# 11 NUM1 spss-r -6.777587e+00 -54.037832960 4.048266e+01 0.9985067
# 12 NUM1 stata-r 1.298055e+01 -36.734312824 6.269542e+01 0.9758552
# 13 NUM1 spss-sas 2.004702e+00 -45.937257220 4.994666e+01 0.9999966
# 14 NUM1 stata-sas 2.176284e+01 -28.600522533 7.212621e+01 0.8188459
# 15 NUM1 stata-spss 1.975814e+01 -30.743328824 7.025961e+01 0.8733230
# 16 NUM2 pandas-julia 4.328917e-02 -0.356126828 4.427052e-01 0.9996168
# 17 NUM2 r-julia 7.351585e-02 -0.318451955 4.654837e-01 0.9946513
# 18 NUM2 sas-julia 1.260665e-01 -0.272043455 5.241765e-01 0.9449289
# 19 NUM2 spss-julia 2.112904e-01 -0.188125601 6.107064e-01 0.6557799
# 20 NUM2 stata-julia 2.834413e-01 -0.137666545 7.045492e-01 0.3876287
# 21 NUM2 r-pandas 3.022668e-02 -0.378438781 4.388921e-01 0.9999416
# 22 NUM2 sas-pandas 8.277736e-02 -0.331782960 4.973377e-01 0.9928376
# 23 NUM2 spss-pandas 1.680012e-01 -0.247813441 5.838159e-01 0.8573913
# 24 NUM2 stata-pandas 2.401522e-01 -0.196540570 6.768449e-01 0.6165912
# 25 NUM2 sas-r 5.255068e-02 -0.354838417 4.599398e-01 0.9991049
# 26 NUM2 spss-r 1.377745e-01 -0.270890910 5.464400e-01 0.9288895
# 27 NUM2 stata-r 2.099255e-01 -0.219965388 6.398164e-01 0.7288549
# 28 NUM2 spss-sas 8.522386e-02 -0.329336457 4.997842e-01 0.9918030
# 29 NUM2 stata-sas 1.573748e-01 -0.278123725 5.928734e-01 0.9063874
# 30 NUM2 stata-spss 7.215095e-02 -0.364541797 5.088437e-01 0.9970600
# 31 NUM3 pandas-julia 2.540117e-03 -0.001925601 7.005835e-03 0.5807914
# 32 NUM3 r-julia -1.418724e-03 -0.005801167 2.963718e-03 0.9396929
# 33 NUM3 sas-julia -9.892923e-04 -0.005440408 3.461824e-03 0.9882722
# 34 NUM3 spss-julia 2.222945e-04 -0.004243423 4.688012e-03 0.9999918
# 35 NUM3 stata-julia 2.480217e-04 -0.004460225 4.956269e-03 0.9999892
# 36 NUM3 r-pandas -3.958842e-03 -0.008527974 6.102912e-04 0.1323856
# 37 NUM3 sas-pandas -3.529410e-03 -0.008164451 1.105631e-03 0.2496801
# 38 NUM3 spss-pandas -2.317823e-03 -0.006966888 2.331242e-03 0.7109641
# 39 NUM3 stata-pandas -2.292096e-03 -0.007174591 2.590399e-03 0.7607515
# 40 NUM3 sas-r 4.294318e-04 -0.004125430 4.984294e-03 0.9998066
# 41 NUM3 spss-r 1.641019e-03 -0.002928114 6.210151e-03 0.9086036
# 42 NUM3 stata-r 1.666746e-03 -0.003139700 6.473192e-03 0.9204005
# 43 NUM3 spss-sas 1.211587e-03 -0.003423454 5.846628e-03 0.9757341
# 44 NUM3 stata-sas 1.237314e-03 -0.003631829 6.106457e-03 0.9785797
# 45 NUM3 stata-spss 2.572720e-05 -0.004856768 4.908222e-03 1.0000000