循环遍历R数据帧中的列

时间:2015-11-18 10:55:33

标签: r

我在R中编写一个代码,允许我绘制直方图以及中位数和四分位数,但我在数据帧列中循环时遇到问题。

您可以找到我的数据框标题和代码。

最后,会生成直方图,但中位数和四分位数与实际分布无关。

  ROI    DOY_119    DOY_127   DOY_143    DOY_151   DOY_175    DOY_191    DOY_215    DOY_239    DOY_263
1   4 -11.592668  -9.457701 -12.57275 -11.073490 -8.999743  -9.132843  -9.995659  -9.511699  -9.393022
2   4 -11.518109 -10.231917 -11.96543 -10.757207 -9.558524  -8.529423  -9.562449  -9.511699  -9.578184
3   4  -9.633711  -9.488475 -12.09012 -10.357404 -8.535619  -8.294449  -9.179331  -7.652297  -6.952941
4   4  -7.752080  -9.578184 -11.30182 -11.073490 -8.992849  -6.197888  -6.556077  -5.883803  -6.324577
5   4 -12.533658  -9.347939 -12.74088 -10.506100 -8.958544 -10.486625 -10.809219 -10.550241  -9.307020
6   4 -13.036436  -8.054857 -13.45823  -9.122186 -7.654827 -10.159230 -10.423927 -11.319436 -10.736576

for (i in 2:ncol(fileIn)){

  myHist <- paste(directory, (i-1), sep="")
  x11(width = 50, height = 50)

  medi <- ddply(fileIn, "ROI", summarise, grp.medi=median (as.numeric(as.matrix(fileIn[i]))))
  q05  <- ddply(fileIn, "ROI", summarise, grp.q05=quantile(as.numeric(as.matrix(fileIn[i]))),0.05)
  q25  <- ddply(fileIn, "ROI", summarise, grp.q25=quantile(as.numeric(as.matrix(fileIn[i]))),0.25)
  q75  <- ddply(fileIn, "ROI", summarise, grp.q75=quantile(as.numeric(as.matrix(fileIn[i]))),0.75)
  q95  <- ddply(fileIn, "ROI", summarise, grp.q95=quantile(as.numeric(as.matrix(fileIn[i]))),0.95)

  plotHist <-
    ggplot(fileIn) +
    aes(x = as.numeric(as.matrix(fileIn[i,]))) +                                              
    # aes(x = DOY_119) +                                              
    geom_histogram(alpha = 0.5, binwidth = 0.5, color="grey", fill= "yellow") +                                       
    geom_density(color = "green", fill= "green", alpha = 0.5) +
    geom_vline(data=medi, aes(xintercept=grp.medi), color="red",   size = 0.7) +
    geom_vline(data=q05,  aes(xintercept=grp.q05),  color="black", size = 0.3) +
    geom_vline(data=q25,  aes(xintercept=grp.q25),  color="blue",  size = 0.5) +
    geom_vline(data=q75,  aes(xintercept=grp.q75),  color="blue",  size = 0.5) +
    geom_vline(data=q95,  aes(xintercept=grp.q95),  color="black", size = 0.3) +
    theme(axis.text.x = element_text(colour = "black"),
          axis.text.y = element_text(colour = "black")) +         
    facet_wrap( ~ ROI, scales = "free")

  plot(plotHist)

  #------------------------------------------------------------------------------------------------------
  # salvataggio X11

  dev.copy(jpeg, myHist, width=2000, height=1000, res=100)
  dev.off()
}

1 个答案:

答案 0 :(得分:0)

这是一个开始。这是一种可以通过多种方式解决的问题,一种并不总是比另一种更好。在你的代码中,你正在做一些非常低效的事情(比如计算分位数并创建vline)。通常在ggplot中,如果你发现自己重复非常相似的行(比如5次调用vline),那么有一种更好的方法。我已将'vline_data and fed that to geom_vline'的计算结合一些手动刻度替换。

#add second ROI for plotting/demonstration purposes
fileIn2 <- fileIn
fileIn2$ROI <- 5

fileIn <- rbind(fileIn,fileIn2)


myplots <- lapply(colnames(fileIn)[-1],function(col_of_interest){
#create summary_data for quantiles
  vline_data <- ddply(fileIn,.(ROI), function(x){
    myprobs=c(0.05,0.25,0.5,0.75,0.95)
    res <- data.frame(prob=as.character(myprobs),value=quantile(x[,col_of_interest],probs=myprobs) )
    res
  })
  #create plot. Note the use of aes_string here.
  plotHist <-
    ggplot(fileIn, aes_string(x=col_of_interest))+
    geom_histogram(alpha = 0.5, binwidth = 0.5, color="grey", fill= "yellow") +                                       
    geom_density(color = "green", fill= "green", alpha = 0.5) +
    geom_vline(data=vline_data, aes(xintercept=value,size=prob,color=prob)) +
    scale_color_manual(values=c("black","blue","red","black","blue"),
                       breaks=as.character(c(0.05,0.25,0.5,0.75,0.95)))+
    scale_size_manual(values=c(0.3,0.5,0.7,0.5,0.3),
                      breaks=as.character(c(0.05,0.25,0.5,0.75,0.95)))+
    facet_wrap( ~ ROI, scales = "free") 
  #optional: use `ggsave` here. 
  #ggsave(file=paste(directory,col_of_interest,".png"),plot=plotHist)
  return(plotHist)
}
)

enter image description here