我的问题可能有点难以理解,但我现在将尝试进一步解释。但是首先,我们以数据集mtcars
为例。我们现在ggplot:
ggplot(mtcars) +
geom_jitter(aes(disp,mpg), colour="blue") + geom_smooth(aes(disp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
geom_jitter(aes(hp,mpg), colour="green") + geom_smooth(aes(hp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
labs(x = "Percentage cover (%)", y = "Number of individuals (N)")
我们得到这个情节:
如您所见,两组的置信度限制的某些部分重叠。但是,我对重叠区域不感兴趣。
我对一行中的重叠百分比非常感兴趣。在这种情况下,该行是X的特定值。
让我们来x = 250
。在下一张图片中,您看到两个距离(红色和粉红色),每个距离对应于1组的CL。红色和粉红色之间有重叠,我想计算出这两个组之间的百分比。如何计算两个百分比?
答案 0 :(得分:0)
如果没有一些数据来举例说明,这有点棘手,因此,如果我正确理解,对于x轴上的给定值,在拟合模型后的某个位置,间隔的下限值和上限值都应该同时存在。那么,如果a是较高的线而b是较低的线,则对于每个x点,您都可以具有(La,Ua)和(Lb,Ub),即两个置信区间的下限值和上限值,然后是相对于曲线a的重叠为100 *(Ub-La)/(Ua-La)。以类似的方式,相对于b的重叠比例将为100 *(Ua-Lb)/(Ub-Lb)。在两种情况下,“ /”都是除号。
答案 1 :(得分:0)
,
重叠面积,
对应数据and
最终图解:data("mtcars")
model1=(lm(formula= mpg~poly(disp,3),data =mtcars))
model2=(lm(formula= mpg~poly(hp,3),data = mtcars))
overlapping<-function(data="mtcars",model1,model2, se=T, level=0.95,x.var1="disp",x.var2="hp",n_x=101,given_point=300){
require(ggplot2)
data1<-eval(parse(text=data))
range_x1<-range(eval(parse(text=paste0(data,"$",x.var1))))
range_x2<-range(eval(parse(text=paste0(data,"$",x.var2))))
x_range<-c(pmax(range_x1,range_x2)[1],pmin(range_x1,range_x2)[2])
predictdf.loess <- function(model, se=T, level=level,x.var="",n_x=101,x_range) {
xseq<-unique(seq(x_range[1],x_range[2],length.out = n_x))
newdata<-data.frame(assign (x.var , xseq))
names(newdata)<-x.var
pred <- stats::predict(model, newdata , se = se)
if (se) {
y = pred$fit
ci <- pred$se.fit * stats::qt(level / 2 + .5, pred$df)
ymin = y - ci
ymax = y + ci
data.frame(x = xseq, y, ymin, ymax, se = pred$se.fit)
} else {
data.frame(x = xseq, y = as.vector(pred))
}
}
smooth1<-predictdf.loess(model=model1, se=T, level,x.var=x.var1,n_x=101,x_range)
smooth2<-predictdf.loess(model=model2, se=T, level,x.var=x.var2,n_x=101,x_range)
perdict_givenpoint_model1<-predictdf.loess(model=model1, se=T, level,x.var=x.var1,n_x=101,x_range=c(given_point,given_point))[1,]
perdict_givenpoint_model2<-predictdf.loess(model=model2, se=T, level,x.var=x.var2,n_x=101,x_range=c(given_point,given_point))[1,]
y_givenpoint<-c(pmin(perdict_givenpoint_model1$ymin,perdict_givenpoint_model2$ymin),max_gp<- pmax(perdict_givenpoint_model1$ymax,perdict_givenpoint_model2$ymax))
overlap_<-function(x=perdict_givenpoint_model1,y=perdict_givenpoint_model2){
if(x$ymin<=y$ymax & x$ymax>= y$ymin){
if(x$ymax>y$ymax){ymax=y$ymax}else{ymax=x$ymax}
if(x$ymin >y$ymin){ymin=x$ymin}else{ymin=y$ymin}
return(cbind.data.frame(x=x$x,ymin=ymin,ymax=ymax))
} else{
return(cbind.data.frame(x=x$x,ymin=0,ymax=0))
}
}
overlap.givenpoint<-overlap_()
percent_gp1<-abs(overlap.givenpoint$ymax-overlap.givenpoint$ymin)/abs(diff(y_givenpoint))*100
percent_gp<-round(percent_gp1,2)
condit<-smooth1$ymin<=smooth2$ymax & smooth1$ymax>= smooth2$ymin
data<-list()
j<-0
for (i in which(condit)) {
if(smooth1$ymax[i]>smooth2$ymax[i]){ymax=smooth2$ymax[i]}else{ymax=smooth1$ymax[i]}
if(smooth1$ymin[i] >smooth2$ymin[i]){ymin=smooth1$ymin[i]}else{ymin=smooth2$ymin[i]}
j<-j+1
data[[j]]<-cbind.data.frame(x=smooth1$x[i],ymin=ymin,ymax=ymax)
}
df <- data.frame(matrix(unlist(data), nrow=length(data), byrow=T),stringsAsFactors=FALSE)
require(bivrp)
Polygon <- data.frame(x=c(df$X1,rev(df$X1)), y=c(df$X3, rev(df$X2)))
area<-polygon.area(Polygon)$area
p1<-ggplot(mtcars) +
geom_point(aes(disp,mpg), colour="blue") + geom_smooth(aes(disp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
geom_point(aes(hp,mpg), colour="green") + geom_smooth(aes(hp,mpg), method=lm, formula= y~poly(x,3), level=0.95, alpha=0.2) +
labs(x =paste("Percentage cover",percent_gp, "%") , y = "Number of individuals (N)")+geom_ribbon(data=df,aes(x=X1,ymin=X3,ymax=X2),fill="gray40")+geom_segment (data=overlap.givenpoint,aes(x=x, xend=x,y=overlap.givenpoint$ymin, yend=overlap.givenpoint$ymax ),colour="red", size=3)
out<-list(y_givenpoint,overlap.givenpoint,df,area,percent_gp,p1)
names(out)<-c("Maximum of Y in CI for Given Point","Y in Overlap Area","Overlap Dataset","Ovelap Area","Vline Percentage in Overlap Area by given point","Plot")
return(out)
}
overlapping(model1=model1,model2=model2)
model1=(lm(formula= mpg~poly(disp,3),data =mtcars))
model2=(lm(formula= mpg~poly(hp,3),data = mtcars))
ggplot2使用此函数( if n smaller than 1000)
来估计CI和平滑线:
predictdf.loess <- function(model, se=T, level=level,x.var="",given.point=300) {
newdata<-data.frame(assign (x.var , given.point))
names(newdata)<-x.var
pred <- stats::predict(model, newdata , se = se)
if (se) {
y = pred$fit
ci <- pred$se.fit * stats::qt(level / 2 + .5, pred$df)
ymin = y - ci
ymax = y + ci
data.frame(x = given.point, y, ymin, ymax, se = pred$se.fit)
} else {
data.frame(x = given.point, y = as.vector(pred))
}
}
Y为0.95 CI对于给定点的两个模型:
perdict_givenpoint_model1<-predictdf.loess(model=model1, se=T, level=0.95,x.var="disp",given.poin=300)
perdict_givenpoint_model2<-predictdf.loess(model=model2, se=T, level=0.95,x.var="hp",given.poin=300)
两种模型在给定点的重叠区域中Y的最大值和最小值:
y_givenpoint<- c(pmin(perdict_givenpoint_model1$ymin,perdict_givenpoint_model2$ymin),max_gp<- pmax(perdict_givenpoint_model1$ymax,perdict_givenpoint_model2$ymax))
在重叠区域中查找Y的功能:
overlap_<-function(x=perdict_givenpoint_model1,y=perdict_givenpoint_model2){
if(x$ymin<=y$ymax & x$ymax>= y$ymin){
if(x$ymax>y$ymax){ymax=y$ymax}else{ymax=x$ymax}
if(x$ymin >y$ymin){ymin=x$ymin}else{ymin=y$ymin}
return(cbind.data.frame(x=x$x,ymin=ymin,ymax=ymax))
} else{
return(cbind.data.frame(x=x$x,ymin=0,ymax=0))
}
}
重叠区域的Y:
overlap.givenpoint<-overlap_()
给定点在重叠区域中的Vline百分比:
percent_gp1<-abs(overlap.givenpoint$ymax-overlap.givenpoint$ymin)/abs(diff(y_givenpoint))*100
percent_gp<-round(percent_gp1,2)
对于上回归
abs(overlap.givenpoint$ymax-overlap.givenpoint$ymin)/abs(perdict_givenpoint_model1$ymax-perdict_givenpoint_model1$ymin)*100