R中回归之间特定X值的置信区间之间的重叠百分比

时间:2018-07-27 23:11:19

标签: r

我的问题可能有点难以理解,但我现在将尝试进一步解释。但是首先,我们以数据集mtcars为例。我们现在ggplot:

ggplot(mtcars) + 
  geom_jitter(aes(disp,mpg), colour="blue") + geom_smooth(aes(disp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
  geom_jitter(aes(hp,mpg), colour="green") + geom_smooth(aes(hp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
  labs(x = "Percentage cover (%)", y = "Number of individuals (N)")

我们得到这个情节:

enter image description here

如您所见,两组的置信度限制的某些部分重叠。但是,我对重叠区域不感兴趣

对一行中的重叠百分比非常感兴趣。在这种情况下,该行是X的特定值。

让我们来x = 250。在下一张图片中,您看到两个距离(红色和粉红色),每个距离对应于1组的CL。红色和粉红色之间有重叠,我想计算出这两个组之间的百分比。如何计算两个百分比?

enter image description here

2 个答案:

答案 0 :(得分:0)

如果没有一些数据来举例说明,这有点棘手,因此,如果我正确理解,对于x轴上的给定值,在拟合模型后的某个位置,间隔的下限值和上限值都应该同时存在。那么,如果a是较高的线而b是较低的线,则对于每个x点,您都可以具有(La,Ua)和(Lb,Ub),即两个置信区间的下限值和上限值,然后是相对于曲线a的重叠为100 *(Ub-La)/(Ua-La)。以类似的方式,相对于b的重叠比例将为100 *(Ua-Lb)/(Ub-Lb)。在两种情况下,“ /”都是除号。

答案 1 :(得分:0)

函数给出了回归之间特定X值的置信区间之间的重叠百分比,重叠面积,对应数据and最终图解:

data("mtcars")
model1=(lm(formula= mpg~poly(disp,3),data =mtcars))
model2=(lm(formula= mpg~poly(hp,3),data = mtcars))
overlapping<-function(data="mtcars",model1,model2, se=T, level=0.95,x.var1="disp",x.var2="hp",n_x=101,given_point=300){
  require(ggplot2)
  data1<-eval(parse(text=data))
  range_x1<-range(eval(parse(text=paste0(data,"$",x.var1))))
  range_x2<-range(eval(parse(text=paste0(data,"$",x.var2))))
  x_range<-c(pmax(range_x1,range_x2)[1],pmin(range_x1,range_x2)[2])

  predictdf.loess <- function(model,  se=T, level=level,x.var="",n_x=101,x_range) {
    xseq<-unique(seq(x_range[1],x_range[2],length.out =  n_x)) 
    newdata<-data.frame(assign (x.var , xseq))
    names(newdata)<-x.var
    pred <- stats::predict(model, newdata , se = se)

    if (se) {
      y = pred$fit
      ci <- pred$se.fit * stats::qt(level / 2 + .5, pred$df)
      ymin = y - ci
      ymax = y + ci
      data.frame(x = xseq, y, ymin, ymax, se = pred$se.fit)
    } else {
      data.frame(x = xseq, y = as.vector(pred))
    }

  }
  smooth1<-predictdf.loess(model=model1,  se=T, level,x.var=x.var1,n_x=101,x_range)
  smooth2<-predictdf.loess(model=model2,  se=T, level,x.var=x.var2,n_x=101,x_range)

  perdict_givenpoint_model1<-predictdf.loess(model=model1,  se=T, level,x.var=x.var1,n_x=101,x_range=c(given_point,given_point))[1,]
  perdict_givenpoint_model2<-predictdf.loess(model=model2,  se=T, level,x.var=x.var2,n_x=101,x_range=c(given_point,given_point))[1,]
  y_givenpoint<-c(pmin(perdict_givenpoint_model1$ymin,perdict_givenpoint_model2$ymin),max_gp<- pmax(perdict_givenpoint_model1$ymax,perdict_givenpoint_model2$ymax))


 overlap_<-function(x=perdict_givenpoint_model1,y=perdict_givenpoint_model2){
   if(x$ymin<=y$ymax & x$ymax>= y$ymin){
   if(x$ymax>y$ymax){ymax=y$ymax}else{ymax=x$ymax}
   if(x$ymin >y$ymin){ymin=x$ymin}else{ymin=y$ymin}
     return(cbind.data.frame(x=x$x,ymin=ymin,ymax=ymax))
   } else{
       return(cbind.data.frame(x=x$x,ymin=0,ymax=0))
  }
}

overlap.givenpoint<-overlap_()
percent_gp1<-abs(overlap.givenpoint$ymax-overlap.givenpoint$ymin)/abs(diff(y_givenpoint))*100
  percent_gp<-round(percent_gp1,2)

  condit<-smooth1$ymin<=smooth2$ymax & smooth1$ymax>= smooth2$ymin
  data<-list()
  j<-0
  for (i in which(condit)) {
    if(smooth1$ymax[i]>smooth2$ymax[i]){ymax=smooth2$ymax[i]}else{ymax=smooth1$ymax[i]}
    if(smooth1$ymin[i] >smooth2$ymin[i]){ymin=smooth1$ymin[i]}else{ymin=smooth2$ymin[i]}
    j<-j+1
    data[[j]]<-cbind.data.frame(x=smooth1$x[i],ymin=ymin,ymax=ymax)
  }

  df <- data.frame(matrix(unlist(data), nrow=length(data), byrow=T),stringsAsFactors=FALSE)
  require(bivrp)
  Polygon <- data.frame(x=c(df$X1,rev(df$X1)), y=c(df$X3, rev(df$X2)))
  area<-polygon.area(Polygon)$area
  p1<-ggplot(mtcars) + 
    geom_point(aes(disp,mpg), colour="blue") + geom_smooth(aes(disp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
geom_point(aes(hp,mpg), colour="green") + geom_smooth(aes(hp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
labs(x =paste("Percentage cover",percent_gp, "%")  , y = "Number of individuals (N)")+geom_ribbon(data=df,aes(x=X1,ymin=X3,ymax=X2),fill="gray40")+geom_segment (data=overlap.givenpoint,aes(x=x, xend=x,y=overlap.givenpoint$ymin, yend=overlap.givenpoint$ymax ),colour="red", size=3)
 out<-list(y_givenpoint,overlap.givenpoint,df,area,percent_gp,p1)
  names(out)<-c("Maximum of Y in CI for Given Point","Y in Overlap Area","Overlap Dataset","Ovelap Area","Vline Percentage in Overlap Area by given point","Plot")
  return(out)

}
overlapping(model1=model1,model2=model2)

为简单起见(只需计算R中回归之间特定X值的置信区间之间的重叠百分比)

回归模型:
model1=(lm(formula= mpg~poly(disp,3),data =mtcars))
model2=(lm(formula= mpg~poly(hp,3),data = mtcars))
ggplot2使用此函数( if n smaller than 1000)来估计CI和平滑线:
predictdf.loess <- function(model,  se=T, level=level,x.var="",given.point=300) {
  newdata<-data.frame(assign (x.var , given.point))
  names(newdata)<-x.var
  pred <- stats::predict(model, newdata , se = se)

  if (se) {
    y = pred$fit
    ci <- pred$se.fit * stats::qt(level / 2 + .5, pred$df)
    ymin = y - ci
    ymax = y + ci
    data.frame(x = given.point, y, ymin, ymax, se = pred$se.fit)
  } else {
    data.frame(x = given.point, y = as.vector(pred))
  }

}
Y为0.95 CI对于给定点的两个模型:
perdict_givenpoint_model1<-predictdf.loess(model=model1,  se=T, level=0.95,x.var="disp",given.poin=300)
perdict_givenpoint_model2<-predictdf.loess(model=model2,  se=T, level=0.95,x.var="hp",given.poin=300)
两种模型在给定点的重叠区域中Y的最大值和最小值:
y_givenpoint<- c(pmin(perdict_givenpoint_model1$ymin,perdict_givenpoint_model2$ymin),max_gp<- pmax(perdict_givenpoint_model1$ymax,perdict_givenpoint_model2$ymax))
在重叠区域中查找Y的功能:
overlap_<-function(x=perdict_givenpoint_model1,y=perdict_givenpoint_model2){
   if(x$ymin<=y$ymax & x$ymax>= y$ymin){
   if(x$ymax>y$ymax){ymax=y$ymax}else{ymax=x$ymax}
   if(x$ymin >y$ymin){ymin=x$ymin}else{ymin=y$ymin}
     return(cbind.data.frame(x=x$x,ymin=ymin,ymax=ymax))
   } else{
       return(cbind.data.frame(x=x$x,ymin=0,ymax=0))
   }
}
重叠区域的Y:
overlap.givenpoint<-overlap_()
给定点在重叠区域中的Vline百分比:
percent_gp1<-abs(overlap.givenpoint$ymax-overlap.givenpoint$ymin)/abs(diff(y_givenpoint))*100
percent_gp<-round(percent_gp1,2)
对于上回归
abs(overlap.givenpoint$ymax-overlap.givenpoint$ymin)/abs(perdict_givenpoint_model1$ymax-perdict_givenpoint_model1$ymin)*100