GGPLOT构面中的条形图和散点图

时间:2020-03-27 04:05:59

标签: r ggplot2 facet

我有一些数据显示对治疗的反应(分类变量)。每个重复项还具有一个基于相对于个体的处理量(连续变量)分配的已知值。我想做的是使用ggplot2在一个图中显示对治疗效果(条形图)和连续变量(散点图/回归)的响应,其方式类似于{可以使用{1}}或facet_wrap命令。基本上,我想使用facet_grid重新创建下面的图。

enter image description here

这是我用来生成样本数据和创建图的代码

ggplot2

是否甚至可以使用 ## GENERATE CONTINUOUS VARIABLES FOR EACH TREATMENT (A - D) A <- abs(norm(10, 1, 1)) B <- abs(rnorm(10, 3, 1)) C <- abs(rnorm(10, 5, 1)) D <- abs(rnorm(10, 7, 1)) ## GENERATE RESPONSE TO TREATMENTS res_A<-rnorm(10, 28, 3) res_B<-rnorm(10, 22, 3) res_C<-rnorm(10, 18, 3) res_D<-rnorm(10, 12, 3) ## ESTABLISH DATA FRAMES FOR TREATMENTS AND RESPONSE treatments<-data.frame(A, B, C, D) response<-data.frame(res_A, res_B, res_C, res_D) ## CONVERT EACH DATA FRAME TO LONG FORM library(reshape2) treatments <-treatments %>% gather(Treatment, cont_x, A:D) response <-response %>% gather(Treatment, Response, res_A:res_D) ## CREATE FINAL DATA FRAME WITH REQUIRED DATA data<-data.frame(treatments$Treatment, treatments$cont_x, response$Response) colnames(data) <- c("Treatment", "X", "Response") ## ESTABLISH MEANS AND STANDARD ERROR FOR TREATMENT EFFECTS means<-tapply(data$Response,list(data$Treatment),mean,na.rm=T) ER<-tapply(data$Response,list(data$Treatment),sd,na.rm=T)/sqrt(tapply(data$Response,list(data$Treatment),length)) ## SET AESTHETICS AND LABEL VALUES cols<-c("darkcyan","olivedrab3", "palevioletred3","brown3") labs<-c("A", "B", "C", "D") ## GENERATE PLOT CANVASS par(mfrow=c(1,2)) par(mar=c(3.5,3,2,1)) ## GENEATE BAR PLOT graph<-tapply(data$Response,data$Treatment,mean,na.rm=T) plot<-barplot(graph,col=cols,las=1,xaxt='n',yaxt='n', xlab=NA,ylab =NA,font.lab=2, cex=0.6,cex.lab=0.6,font.lab=2,font.axis=2, cex.axis=0.6,ylim=c(0,40), main="TREATMENT EFFECTS") box() arrows(x0=plot,y0=means-ER,x1=plot, y1=means+ER,code=3,angle=90,length=0.02,lwd=1) axis(side=1,line=0,at=plot,labels=labs, cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1) axis(side=2,line=0,at=seq(0,40,10),las=1,cex.axis=0.8, labels=seq(0,40,10),cex=0.6,mgp=c(0,0.6,0)) xlab<-c("Treatment") ylab<-c("Response") mtext(xlab, side=1, cex=1.2, line=2) mtext(ylab, side=2, cex=1.2, line=1.75) mark<-(means+ER)+2 text(0.7,mark[1],"a",font=1,cex=1.2) text(1.9,mark[2],"b",font=1,cex=1.2) text(3.1,mark[3],"c",font=1,cex=1.2) text(4.3,mark[4],"d",font=1,cex=1.2) ## GENERATE SCATTERPLOT par(mar=c(3.5, 2, 2, 2)) plot(data$X, data$Response,type='n',ylim = c(0, 40), xlim=c(0,9),pch=21, col='black', cex=1.5, xaxt='n', yaxt='n', xlab=NA, ylab=NA, main = "CONTINUOUS RESPONSE") axis(side=1,line=0,tck=NA,at=seq(0,9,3),labels=T, cex.axis=0.8,mgp=c(0,0.5,0),tck=-0.02,font.axis=1) axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=0.01) axis(side=2,line=0,at=seq(0,40,10),labels=F, tck=-0.01) xlab<-c("Continuous variable") mtext(xlab, side=1, cex=1.2, line=2) ## PERFORM REGRESSION AND ADD IN REGRESSION LINE model<-lm (Response ~ X, data = data) abline(model, lwd=2) ## ADD IN CONFIDENCE INTERVAL newx <- seq(0,9,length.out=1000) preds <- predict(model, newdata = data.frame(X=newx), interval = 'confidence') lines(newx, preds[ ,3], lty = 'dashed', col = "grey36",lwd=1) lines(newx, preds[ ,2], lty = 'dashed', col = 'grey36',lwd=1) polygon(c(rev(newx), newx), c(rev(preds[ ,3]), preds[ ,2]), col = 'grey80', border = NA) ## ADD IN POINTS ONTOP OF CI POLYGON points(data$X, data$Response, bg= ifelse(data$Treatment == "A", "darkcyan", ifelse(data$Treatment == "B","olivedrab3", ifelse(data$Treatment == "C", "palevioletred3", "brown3"))),pch=21, col='black', cex=1.5) ## ADD THE REGRESSION EQ eq<-expression(italic("y = 28.54 - 2.16x")) rsq<-expression(italic("R"^{2}~"= 0.76 ***")) text(5 ,35, eq, cex=1.2) text(4.5, 33, rsq, cex=1.2 来做到这一点?

3 个答案:

答案 0 :(得分:2)

确保您可以在ggplot2ggpubr的帮助下使用ggpmisc

library(ggplot2)
library(ggpubr)
library(ggpmisc)

a <- data %>% 
       group_by(Treatment) %>% 
       summarise(Response=mean(Response)) %>% 
       mutate(se = sd(Response)/sqrt(length(Response))) %>% ungroup %>%
     ggplot(aes(x=Treatment,y=Response,fill = Treatment)) + 
       geom_col(show.legend = FALSE) + 
       geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.3)) +
       geom_errorbar(aes(ymin = Response - se, ymax = Response + se), width = 0.1) + scale_fill_manual(values = cols) +
       labs(title="TREATMENT EFFECTS")

b <- data %>% 
     ggplot(aes(x=X,y=Response)) + 
      geom_smooth(method="lm", show.legend = FALSE) + 
      geom_point(aes(fill = factor(Treatment)),shape=21,size=3, show.legend = FALSE) +
      scale_fill_manual(values = cols) +
      labs(title = "CONTINUOUS RESPONSE", xlab="Continuous Variable") +
      xlab("Continuous Variable") +
      stat_poly_eq(formula = y ~ x, label.x = 0.9, label.y = 0.95, aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE)

ggarrange(a,b,nrow = 1)

Plot 数据:

data <- structure(list(Treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L), .Label = c("A", "B", "C", "D"), class = "factor"), X = c(0.0267649727104236, 
1.39488715616046, 0.21355823940511, 0.49907410504292, 0.375279051787701, 
0.681959942595334, 2.05952354292797, 2.26083984353423, 1.11354591314711, 
0.34506137947979, 2.07866454079728, 3.20194490569932, 3.26342299162599, 
2.06754732525705, 4.02484423090347, 3.21831253488128, 3.56925840330762, 
3.92631915144912, 2.55130407898901, 4.88369094725247, 4.85805706436391, 
6.06714331089959, 5.05696298595936, 4.28599088092722, 2.64907718621996, 
5.50017966947343, 5.27853136585637, 5.8694723514342, 4.57774253201089, 
4.28459862391154, 6.6919479712577, 6.89039252602714, 7.36883429701188, 
5.01895090471179, 7.66623439220746, 7.27620218490248, 6.44135570941742, 
7.66409390386461, 8.09858213415943, 4.89114777053612), Response = c(28.647362805403, 
30.5878855986189, 36.3739824861786, 33.5874379487616, 23.8060926287858, 
30.8520531077353, 26.5940268747477, 28.8356526462252, 30.4727218173035, 
26.8151163416507, 17.6391456006427, 19.0921380684935, 21.6950437768534, 
23.9017396212974, 27.1407090174467, 15.4322366130883, 26.9809942596379, 
22.7341801522041, 23.6518581209459, 21.8377270248132, 13.2905142368901, 
19.8951142352182, 17.1400860924093, 16.847732448511, 15.6213812276033, 
18.3368951001566, 18.7411799795391, 17.5514579276854, 14.2841781950673, 
21.6044042356051, 11.0037691942103, 13.0260853225773, 10.6862778263241, 
9.4482751070798, 11.9896873712498, 10.0798146375625, 12.6332310111476, 
14.4806588768585, 6.89810707498932, 7.55062781781536)), class = "data.frame", row.names = c(NA, 
-40L))

cols <- c("darkcyan", "olivedrab3", "palevioletred3", "brown3")

答案 1 :(得分:0)

是的,可以使用ggplot创建两个图。这是工作区:

library(ggplot2)
library(dplyr)

data %>%
  group_by(Treatment) %>%
  summarise(Response=mean(Response)) %>%
  ggplot(aes(x=Treatment, y=Response)) +
  geom_col(aes(fill=cols)) +
  geom_errorbar(ymin=means+ER, ymax=means-ER, width = 0.1) +
    geom_text(aes(label=tolower(Treatment)), position=position_stack(vjust = 1.1)) +
  labs(title="TREATMENT EFFECTS") +
  theme_bw() +
  theme(legend.position="none")

enter image description here

这是带有方程式并添加了R 2 的散点图。 ggpubr 软件包在这里很有用,可以节省编码。

library(ggpubr)

data %>%
  ggplot(aes(x=X, y=Response)) +
  geom_point(aes(fill=Treatment), pch=21, col="black", size=4) +
  scale_fill_manual(values=c("darkcyan", "olivedrab3", "palevioletred3", "brown3")) +
  scale_x_continuous(breaks=seq(0,9,3)) +
  scale_y_continuous(lim=c(0, 40), labels=NULL) +
  geom_smooth(method="lm") +
  stat_regline_equation(label.y=33, label.x=4, 
                        aes(label = ..adj.rr.label..)) +
  stat_regline_equation(label.y=35, label.x=4, 
                        aes(label =  ..eq.label..)) +
  labs(title="CONTINUOUS RESPONSE", x="Continuous variable", y="") +
  theme_classic() +
  theme(legend.position="none", plot.title = element_text(hjust = 0.5))

enter image description here

答案 2 :(得分:-1)

答案是肯定的,但并不优雅(仅使用ggplot2)。这是因为the "grammar of graphics" philosophy implemented in ggplot2背后的整个想法是将各个变量(即整洁的data.frame中的列)映射到各个情节美学。因此,使用face_*函数之一只会根据分类值(在face_grid的情况下为2个分类值)将图分成多个面板。但是,在这种情况下,x和y轴仍将映射到同一变量。在您的示例中,这将需要将x轴映射到左侧包含分类值的变量,以及右侧包含连续值的单独变量。 More on this here

如果您想使用ggplot2生成这些图,然后以编程方式将它们组合起来而不必手动进行,我建议您研究ggplot2扩展名,例如cowplot,{ {3}}或gridExtra。我敢肯定这些软件包有一些不错的小插曲,但这是我在过去的egg中发现的有用的教程。