每组分位数关系的最大点

时间:2019-11-21 17:51:18

标签: r loops quantile gam

简单地说;我需要知道,对于x和y之间的关系,沿x轴最大的y轴值适用于许多物种。

我可以使用以下代码对每个物种进行手动操作。这给了我一个最大值沿着x的图。最大值,以数据帧格式表示。以及模型的摘要输出。

我需要四舍五入x轴值,以便能够获得每个x轴值的有意义的分位数。

我想针对每种物种自动执行此分析,并将每种物种模型的摘要输出一起保存在一个csv中,并将所有物种的峰信息(以下代码中的最大当量)一起保存在一个csv中,并绘制图保存到特定文件夹。

我尝试使用for循环无效。

我以鸢尾花数据集为例,因为它提供了足够相似的变量。请注意,我的数据有数百种。

#packages
library(plyr)
library(mgcv)
library(rhub)
library(MASS)
library(qgam)


#DATA
data(iris)
df<-iris
df$x<-df$Sepal.Length
df$y<-df$Petal.Length
df$z<-df$Species



#definitions
quantileXX=0.90
binwidth=1
clustername=  "setosa"
#####clustername=  "versicolor"
#####clustername=  "virginica"



#alter df and vars
df<-subset(df, x>0)
df<-subset(df, df$z == clustername)
df<-df[!is.na(df$z),]
df$x<-round(df$x, binwidth)
df <- aggregate(y ~x, data=df, FUN = quantile, probs = quantileXX)
df<-df[!duplicated(df$x), ]
minxvar=min(df$x)
maxxvar=max(df$x)
minyvar=min(df$y)
maxyvar=max(df$y)

#model to get peaks from:


b <- gam(y~s(x, k=8, bs="ad"), data = df, err = 0.05)


eq<-seq(minxvar,maxxvar,length=1000) 
##making eq dataframe
pd <- data.frame(x=eq)
##use the formula to predict y along the pred.var sequence eq
fv <- predict(b,pd)
##the peak value
eq[fv==max(fv)]

## If we simulate replicate coefficient vectors from the posterior, then the peak location can be obtained for each.

##Xp is the matrix mapping the model coefficients to the model predictions at the equi values supplied in pd.

Xp <- predict(b,pd,type="lpmatrix")
#simulate 1000 coefficient vectors from the posterior using mvrnorm from the MASS library.
br <- mvrnorm(1000,coef(b),vcov(b))
br
#generate peaks of the posterieors
max.eq <- rep(NA,1000)
for (i in 1:1000)
{ fv <- Xp%*%br[i,]
max.eq[i] <- eq[fv==max(fv)]
}

#gives CIs for the simulated peaks
ciquant <- quantile(max.eq,c(.025,.975))
cimean <-mean(max.eq)
plot(b,shade=TRUE)
Fstcipeak <- ciquant[[1]][[1]]
Thrdcipeak<-ciquant[[2]][[1]]
meanpeak<-cimean[[1]]
meanpeak
abline(v=Fstcipeak, col=1, lty=5 , lwd=1)
abline(v=meanpeak, col=2, lty=5 , lwd=3)
abline(v=Thrdcipeak, col=1, lty=5 , lwd=1)
# Plot the fit and peaks as lines::::
fit <- gam(y~s(x, k=8, bs="ad"), data = df, err = 0.05)
summary(fit)

xSeq <- data.frame(cbind("y" = rep(0, 10), "x" = seq(minxvar, maxxvar, length.out = 1e3)))
pred <- predict(fit, newdata = xSeq, se=TRUE)
plot(df$x, df$y, xlab = "MAP (mm)", ylab = "y (m)", ylim = c(minyvar, maxyvar), xlim=c(minxvar,maxxvar), main=clustername)
lines(xSeq$x, pred$fit, lwd = 1)
lines(xSeq$x, pred$fit + 2*pred$se.fit, lwd = 1, col = 2)
lines(xSeq$x, pred$fit - 2*pred$se.fit, lwd = 1, col = 2)
abline(v=Fstcipeak, col=1, lty=5 , lwd=1)
abline(v=meanpeak, col=2, lty=5 , lwd=3)
abline(v=Thrdcipeak, col=1, lty=5 , lwd=1)
summary(fit)





# want to store the summary of curve for each species in one single csv file (i.e. one column for species and then many columns for different coefficietns, r2 etc...
write.csv(max.eq, "FILE")

# want to store  peak data for all species in one csv file

# want to save the plot that is made in one folder with species name, bin width and quantile in title. 

#######################################################################################################

非常感谢您能提供的任何帮助。 如果不清楚,请告诉我。

0 个答案:

没有答案