这是我的数据和我当前的情节
require(ggplot2)
a = rep(c(2,5,10,15,20,30,40,50,75,100), each=7)
b = rep(c(0.001,0.005,0.01,0.05,0.5,5,50), 10)
c = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE)
dt = data.frame(a=a,b=b,c=c)
ggplot(dt, aes(x=a, y=b, color=c)) + geom_point() + scale_y_log10()
我希望背景颜色为蓝色和橙色,而不是上面的蓝色和橙色点。边界可以是直线或一些黄土线或任何更容易实现的(我认为一些流畅的线条会更加花哨)!这听起来像是一个难题。我欢迎我提出的解决方案的变体,只要看起来不错!
你可以帮帮我吗?谢谢。答案 0 :(得分:2)
你可以尝试这个,想法是找到两个区域分离的每个组的点,然后取这两个点的中间并得到一条黄土线作为边界:
library(dplyr)
#make column c numeric and order the dataframe
dt$c<-dt$c*1
dt<-dt[order(a,c),]
#get all the points that are where the change of "region" happens
#here it is where the c variable switches from 0 to 1, since dt is ordered
#by a and c, you can just find the first 1 and take that point and the one
#before
get_group_change<-function(x){
idx<-min(which(x[,"c"]==1))
x[c(idx-1,idx),]
}
boundary_points<-dt %>% group_by(a) %>% do(get_group_change(.))
#get the point in the middle of the boundary points
get_middle<-function(x){exp(mean(log(x)))}
middle_points<-boundary_points %>% group_by(a) %>% summarise_each(funs(get_middle),a,b)
middle_points$c<-2
#make a boundary data frame with a LOESS prediction for b
boundary<-data.frame(a=2:100,b=exp(predict(loess(log(b)~a,middle_points),2:100)),c=2)
#plot the regions, the middle_points are also plotted
ggplot(rbind(dt,middle_points), aes(x=a, y=b, color=as.factor(c))) + geom_point() + scale_y_log10()+
geom_ribbon(data=boundary,aes(ymin=min(dt$b),ymax=b),alpha=0.1,fill="red",colour=NA)+
geom_ribbon(data=boundary,aes(ymin=b,ymax=max(dt$b)),alpha=0.1,fill="green",colour=NA)
我得到这样的东西:
或者用直线表示边界:
ggplot(rbind(dt,middle_points), aes(x=a, y=b, color=as.factor(c))) + geom_point() + scale_y_log10()+
geom_ribbon(data=middle_points,aes(ymin=min(dt$b),ymax=b),alpha=0.1,fill="red",colour=NA)+
geom_ribbon(data=middle_points,aes(ymin=b,ymax=max(dt$b)),alpha=0.1,fill="green",colour=NA)
如果这些点没有离散的b
...