多个箱形图并排

时间:2020-05-13 00:41:29

标签: r ggplot2 boxplot

我正在尝试与ggplot2并排制作多个箱形图。我一直在关注stes Multiple boxplots placed side by side for different column values in ggplot,但运气不佳。

我有以下数据框

Raw <- sp500_logreturns
Normal <- rnorm(1000, 0, sd(sp500_logreturns)
Student <- cbind(c(rt(1000, df = 2)),c(rt(1000, df = 3)))

我想做以下事情 Boxplot sketch

我的Raw向量包含从雅虎下载到环境中的我的价格的logreturns-transformation转换为R。我必须承认我已经迷路了,也不知道我是否正在执行一项不可能的任务。我希望我已经将我的问题和草图一起描述得足够好了。预先谢谢你。

更新1:目标是比较原始数据分布(瘦腿病),因此具有2或3个自由度的学生分布可能比正态分布更合适。为了让您了解我正在查看的数据,以下为摘要:

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.0418425 -0.0023740  0.0005898  0.0004704  0.0045065  0.0484032  

这是我的箱线图,由爱德华的代码制作而成:Boxplot (Edward)

更新2:我知道了。我使用了fitdist中的rugarch,以找出适合原始数据的最佳学生分布。这样,我可以忽略尝试匹配学生分布的不同df的情况。这就是我要继续的内容:

fitdist(distribution = 'std', sp500_logreturns)$pars
          mu        sigma        shape 
0.0008121004 0.0113748869 2.3848231857 

data <- data.frame(
        Raw = as.numeric(sp500_logreturns),
        Normal = rnorm(1006, 0, sd(sp500_logreturns)),
        Student = rdist(distribution = 'std', n = 1006, mu = 0.0008121004, sigma = 0.0113748869, shape = 2.3848231857)
)

data2 <- pivot_longer(data, cols=everything()) %>%
        mutate(name=factor(name, levels=c("Raw","Normal","Student")))

data3 <- data2 %>% summarise(min=min(value), max=max(value))

pbox1 <- (filter(data2, name %in% c("Raw","Normal","Student")) %>%
        ggplot(aes(y=value, fill=name)) +
        geom_boxplot() +
        facet_grid(~name) +
        ylab("Log-returns") +
        ylim(data3$min, data3$max) +
        theme(legend.position = "none",
              axis.ticks.x=element_blank(),
              panel.grid.major.x = element_blank(),
              panel.grid.minor.x = element_blank(),
              axis.text.x=element_text(color="white"))+
        ggtitle("Boxplot comparison")+
        theme(plot.title = element_text(hjust = 0.5)))

这给了我:Boxplot (final)

1 个答案:

答案 0 :(得分:1)

在基数R中:

set.seed(11)
data <- data.frame(
  Raw = rnorm(1000),
  Normal = rnorm(1000),
  Student = cbind(c(rt(1000, df = 2)),c(rt(1000, df = 3)))
)

ylim=c(min(data), max(data))

layout(matrix(1:3, nc=3), widths=c(5,4,5))
par(las=1, mar=c(2,4,5,0))
boxplot(daat$Raw, col="steelblue", ylab="Log-returns", ylim=ylim)
title(main="Raw", line=1)

par(mar=c(2,1,5,0))
boxplot(data$Normal, yaxt="n", col="tomato", ylim=ylim)
title(main="Normal", line=1)

par(mar=c(2,1,5,1))
boxplot(data[,3:4], yaxt="n", col=c("green1","green3"), names=c("df = 2","df = 3"), ylim=ylim)
title(main="Student", line=1)
title(main="Boxplot comparison", outer=TRUE, line=-1.5, cex.main=1.5)

enter image description here


在ggplot2中,涉及更多工作:

set.seed(11)
data <- data.frame(
  Raw = rnorm(1000),
  Normal = rnorm(1000),
  Student = cbind(c(rt(1000, df = 2)),c(rt(1000, df = 3)))
)

library(dplyr)
library(tidyr)
library(ggplot2)

data2 <- pivot_longer(data, cols=everything()) %>%
  mutate(name=factor(name, levels=c("Raw","Normal","Student.1","Student.2")))

data3 <- data2 %>% summarise(min=min(value), max=max(value))

p1 <- filter(data2, name %in% c("Raw","Normal")) %>%
  ggplot(aes(y=value, fill=name)) +
  geom_boxplot() +
  facet_grid(~name) +
  ylab("Log-returns") +
  ylim(data3$min, data3$max) +
  theme_bw() +
  theme(legend.position = "none",
        axis.ticks.x=element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        axis.text.x=element_text(color="white"))

p2 <- filter(data2, grepl("Student", name)) %>%
  mutate(what="Student") %>%
  ggplot(aes(x=name, y=value, fill=name)) +
  geom_boxplot() +
  scale_fill_manual(values=c("green1","green3")) +
  scale_x_discrete(labels=c("df=2", "df=3")) +
  facet_grid(~what) +
  ylim(data3$min, data3$max) +
  theme_bw() +
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        axis.title.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank())

library(ggpubr)
ggarrange(p1, p2)

enter image description here