Question

我想在R中使用ggplot或其他包绘制一个条形图，显示每个条形的多个X变量的值。

感谢您的帮助，并在Akseer et al附加了一个图片，以显示我想要绘制的图表。

下面我提供样本数据来复制这个条形图。

对于前两个代码，干预和组的间隔和顺序旨在反映干预的分类，如示例图所示。这是因为并非所有干预措施都适用于所有人。此外，在创建数据集之后，需要删除那些不属于图B中给定干预的组（国家中位数）的值。

Interventions<-c("Demand of family planning satisfied", ## interventions for 1s group

          "ANC 1+",                                 ## interventions for 2nd group
          "ANC 4+", 
          "ANC by skilled provider",
          "Protected against neonatal tetanus",

          "SBA",                                   ## interventions for 3rd group
          "Facility deliveries",

          "Early breastfeeding",                   ## interventions for 4th group

          "Exclusive breastfeeding at 6 months",   ## interventions for 5th group
          "Minimum meal frequency", 
          "BCG", 
          "Penta3", 
          "Measles",
          "Received vitamin A during the last 6 months",

          "Diarrhoea treatment (ORS)",             ## interventions for 6th group
          "Care seeking for pneumonia", 
          "Antibiotics for pneumonia", 

          "Improved drinking water sources",       ## interventions for 7th group
          "Improved sanitation facilities")

现在我给团体。图B中的每个条形显示了每次干预的全国中位数。前七组是国家中位数，以吸引这些酒吧：

Prepregnancy<- (sample(1:100, 19, replace=TRUE)) ## 1st group

Pregnancy<-(sample(1:100, 19, replace=TRUE))   ## 2nd group

Birth<-(sample(1:100, 19, replace=TRUE))       ## 3rd group

Postnatal<-(sample(1:100, 19, replace=TRUE))   ## 4th group

Infancy<-sample(1:100, 19, replace=TRUE)       ## 5th group

Childhood<-sample(1:100, 19, replace=TRUE)     ## 6th group

Other<-sample(1:100, 19, replace=TRUE)        ## 7th group

下面我提供数据的最后一部分，即该组的数据＆＃34;省级覆盖＆＃34;。这里有一个考虑因素：不同于上面的7个群体（国家中位数），所有这些＆＃34;省级覆盖＆＃34;以下变量适用于19种干预措施中的每一种，如图B所示。

Provincial1<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions
Provincial2<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions
Provincial3<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions
Provincial4<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions
Provincial5<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions 
Provincial6<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions 
Provincial7<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions 
Provincial8<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions 
Provincial9<-sample(1:100, 19, replace=TRUE)  ## provincial level observations for each of the 19 interventions 
Provincial10<-sample(1:100, 19, replace=TRUE) ## provincial level observations for each of the 19 interventions 


mydata_B<-data.frame(Interventions, Prepregnancy, Pregnancy, 
                 Birth, Postnatal, Infancy, Childhood, Other, 

                 Provincial1, Provincial2, Provincial3,
                 Provincial4, Provincial5,
                 Provincial6, Provincial7, Provincial8,
                 Provincial9, Provincial10)

rownames(mydata_B) <- mydata_B[,1]
dtFig3B <- mydata_B[,-1]

同样，在创建数据集之后，需要删除那些不属于图B中给定干预的组（国家中位数）的值。

我很感激有关如何在R中重现这个条形图的任何想法。

Answer 1

此示例说明了如何使用factor(x, levels)确保将同一组中的栏放在一起。在ggplot调用中，您可以将分组变量映射到填充美学，以直观地分隔组。使用stat = "unique"获取唯一值而不是计数（每个条形的高度由df中相应行的数量决定）。

library(ggplot2)

df <- data.frame(x = rep(c("Z", "A", "Y", "B", "X"), each = 5), 
                 value = sample(10:99, 25))

# divide into groups
groups <- c(Z = "g1", A = "g3", Y = "g3", B = "g1", X = "g2")
df$group <- groups[as.character(df$x)]

# set the order of group
df$group <- factor(df$group, c("g1", "g2", "g3"))

# order df by group
df <- df[order(df$group), ]

# reset the order of x accordingly
df$x <- factor(df$x, unique(df$x))

# calculate medians
medians <- tapply(df$value, df$x, median)
df$median <- medians[as.character(df$x)]

# plot, mapping group to fill aesthetic
ggplot(df, aes(x, fill = group)) +
  geom_bar(aes(y = median), stat = "unique") +
  geom_point(aes(y = value)) + 
  labs(y = "values and median")

Answer 2

这显示了如何根据this answer在组之间放置线条。这是@Jordi上面答案的扩展。修改为省点着色并在栏上使用alpha。按照颜色很难说出19个省份，因此可能需要对形状进行一些使用，如其他评论中所述。

library(ggplot2)

# make data
df = read.csv(text='
group,intervention,province,value
g1,i1,p1,10
g1,i1,p2,12
g1,i2,p1,13
g1,i2,p2,15
g2,i3,p1,18
g2,i3,p2,20
g3,i4,p1,14
g3,i4,p2,16
g3,i5,p1,18
g3,i5,p2,20
', stringsAsFactors = FALSE)

# define ordered factors to ontrol plot orders
df$group = ordered(df$group, levels = c("g3", "g2", "g1")) ## deliberately reversed 
df$intervention = ordered(df$intervention, levels = c("i1", "i2", "i3", "i4", "i5"))

# find the last intervention in each group
library(dplyr)
last_in_group = df  %>%
  group_by(group, intervention) %>%
  summarize() %>%
  group_by(group) %>%
  summarize(x = as.integer(tail(intervention,1)) + .5 ) 

# calculate medians
medians <- tapply(df$value, df$intervention, median)
df$median <- medians[as.character(df$intervention)]

# plot, mapping group to fill aesthetic
ggplot(df, aes(x = intervention, fill = group)) +
  geom_col(aes(y = median, fill = group), width = 0.3, alpha=0.2) +
  geom_point(aes(y = value, col=province)) + 
  geom_vline(xintercept = last_in_group$x, lwd = 0.5, linetype=2, alpha = 0.2) +
  scale_y_continuous(expand = c(0,0)) +
  labs(y = "values and median") +
  theme(panel.background = element_rect(fill = "white"))

Answer 3

这可能是更自然的ggplot方法，使用facet_grid生成单行，scales = 'free_x'仅包含使用过的x值，space = 'free'调整每个面板的宽度以适应。对主题的额外调整可以接近所需的演示文稿。

这遵循@Jordi的数据结构和示例

# plot, mapping group to fill aesthetic
ggplot(df, aes(x, fill = group)) +
  geom_bar(aes(y = median), stat = "unique", width= 0.3) +
  geom_point(aes(y = value)) + 
  labs(y = "values and median") +
  facet_grid(. ~ group, scales = "free_x", space = "free")

如何使用ggplot或plotly创建每个条带有多个x变量的条形图？

3 个答案: