R:在x轴上有多个分类变量的Barplot(超过2个)

时间:2017-07-19 00:15:11

标签: r ggplot2 reshape

我正在努力创建一个包含多个分类变量的条形图,所以我当前的数据看起来像这样

df <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c(20,40,60,80))
ID  Type1    Score1   Type2    Score2
1       A        10       C        20
2       B        20       C        40
3       A        30       D        60
4       B        40       D        80

我正在寻找的应该是这样的

enter image description here

接下来我想通过在图表中添加更多类别来进一步实现这一点,因此数据框现在是

df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))
ID  Type1    Score1   Type2    Score2    Colour
1       A        10       C        20     Black
2       B        20       C        40     White
3       A        30       D        60     Black
4       B        40       D        80     White

对于这个,它应该看起来像这样

enter image description here

当只有一种类型时,我通常reshape数据并使用ggplot命令执行此操作。 (我在网上找到的大部分答案都只是这个) 但是,现在不仅仅是一种类型。我不确定我该怎么做。 有什么建议吗?

2 个答案:

答案 0 :(得分:1)

关于第一个请求,请尝试:

df <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c(20,40,60,80))

ab_means <- tapply(df$Score1, df$Type1, mean)
cd_means <- tapply(df$Score2, df$Type2, mean)

par(xpd = TRUE)
bp <- barplot(c(ab_means, cd_means), main = 'Average', ylab = 'Score', font.lab = 2)
text(x = mean(bp[1:2,]), y = -10, labels = 'Score 1', font = 2)
text(x = mean(bp[3:4,]), y = -10, labels = 'Score 2', font = 2)

ns <- length(df$Score1[df$Type1 == 'A'])
ns1 <- length(df$Score1[df$Type1 == 'B'])
ns2 <- length(df$Score2[df$Type2 == 'C'])
ns3 <- length(df$Score2[df$Type2 == 'D'])

text(x = bp, y = c(ab_means, cd_means)+1.5, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))

enter image description here

关于更复杂的请求,请尝试:

# Make the dataframe
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))


# Define plot region
par(mfcol = c(1,2), # two columns, one row
    xpd = TRUE, # let things be plotted outside of the plotting region
    mar = c(5, 4, 6, 2)) # margins of the plotting region


# Compute the means
ab_black_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'Black'])), df$Type1[df2$Colour == 'Black'], mean)
cd_black_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'Black'])), df$Type2[df2$Colour == 'Black'], mean)

ab_white_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'White'])), df$Type1[df2$Colour == 'White'], mean)
cd_white_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'White'])), df$Type2[df2$Colour == 'White'], mean)

# Compute the maximum score value to set the scale
all <- as.numeric(as.matrix(df2[, c('Score1', 'Score2')]))
max_all <- ceiling(max(all, na.rm = TRUE))

# First barplot
bp1 <- barplot(c(ab_black_means, cd_black_means), ylab = 'Score', font.lab = 2, ylim = c(0, max_all))
text(x = mean(bp1[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp1[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)

# Compute the Ns to plot above each bar
ns <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'Black']))
ns1 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'Black']))
ns2 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'Black']))
ns3 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'Black']))


text(x = bp1, y = c(ab_black_means, cd_black_means)+2, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
axis(side = 1, at = bp1, labels = rep('', 4))

text(x = mean(bp1[1:4]), y = par('usr')[3]-16, labels = 'Black', font = 2, cex = 1.3)


# Second barplot
bp2 <- barplot(c(ab_white_means, cd_white_means), yaxt = 'n', ylim = c(0, max_all))
text(x = mean(bp2[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp2[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
axis(side = 1, at = bp2, labels = rep('', 4))

# Compute the Ns to plot above each bar
ns4 <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'White']))
ns5 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'White']))
ns6 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'White']))
ns7 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'White']))

text(x = bp1, y = c(ab_white_means, cd_white_means)+2, labels = sapply(as.character(c(ns4, ns5, ns6, ns7)), function(x) paste('n =', x)))

text(x = mean(bp2[1:4]), y = par('usr')[3]-16, labels = 'White', font = 2, cex = 1.3)


# Final title
title(main = 'Average Score', outer = TRUE, line = -3)

enter image description here

在最后一个情节中,有些酒吧不会出现。这是因为无法计算某些方法(请注意,我们无法为因子mean和颜色B提供'Black'

如果您真的希望x轴连续不间断,可以使用curve()

# Make the dataframe
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))


# Define plot region
par(mfcol = c(1,2), # two columns, one row
    xpd = TRUE, # let things be plotted outside of the plotting region
    mar = c(5, 4, 6, 2)) # margins of the plotting region


# Compute the means
ab_black_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'Black'])), df$Type1[df2$Colour == 'Black'], mean)
cd_black_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'Black'])), df$Type2[df2$Colour == 'Black'], mean)

ab_white_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'White'])), df$Type1[df2$Colour == 'White'], mean)
cd_white_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'White'])), df$Type2[df2$Colour == 'White'], mean)

# Compute the maximum score value to set the scale
all <- as.numeric(as.matrix(df2[, c('Score1', 'Score2')]))
max_all <- ceiling(max(all, na.rm = TRUE))

# First barplot
bp1 <- barplot(c(ab_black_means, cd_black_means), ylab = 'Score', font.lab = 2, ylim = c(0, max_all))
text(x = mean(bp1[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp1[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)

# Compute the Ns to plot above each bar
ns <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'Black']))
ns1 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'Black']))
ns2 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'Black']))
ns3 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'Black']))


text(x = bp1, y = c(ab_black_means, cd_black_means)+2, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
axis(side = 1, at = bp1, labels = rep('', 4))

text(x = mean(bp1[1:4]), y = par('usr')[3]-16, labels = 'Black', font = 2, cex = 1.3)

# First curve added
curve(expr = 0*x+0, from = 0, to = 6, add = TRUE, col = 'black')


# Second barplot
bp2 <- barplot(c(ab_white_means, cd_white_means), yaxt = 'n', ylim = c(0, max_all))
text(x = mean(bp2[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp2[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
axis(side = 1, at = bp2, labels = rep('', 4))

# Compute the Ns to plot above each bar
ns4 <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'White']))
ns5 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'White']))
ns6 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'White']))
ns7 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'White']))

text(x = bp1, y = c(ab_white_means, cd_white_means)+2, labels = sapply(as.character(c(ns4, ns5, ns6, ns7)), function(x) paste('n =', x)))

text(x = mean(bp2[1:4]), y = par('usr')[3]-16, labels = 'White', font = 2, cex = 1.3)

# Second curve added
curve(expr = 0*x+0, from = -10, to = 5, add = TRUE, col = 'black')

# Final title
title(main = 'Average Score', outer = TRUE, line = -3)

enter image description here

请告诉我它是否适合您,如果我可以进一步帮助您。希望它有所帮助!

答案 1 :(得分:1)

我试图在这里创建一个tidyverse解决方案:

对于您的第一个数据框 - df

library(tidyverse)

#tidy the dataframe
df_1 <- df %>% select(ID, Type1, Score1) %>% rename(Type = Type1, Score = Score1) %>% mutate(Type_number = as.factor(1), Score_number = as.factor(1))
df_2 <- df %>% select(ID, Type2, Score2) %>% rename(Type = Type2, Score = Score2) %>% mutate(Type_number = as.factor(2), Score_number = as.factor(2))
df_tidy <- bind_rows(df_1, df_2) %>% mutate_each(funs(as.factor(.)), ID, Type, Type_number, Score_number)

#summarise the dataframe - create means of scores by Type
df_sum <- df_tidy %>% group_by(Type, Score_number) %>% summarise(Mean_score = mean(Score))

#create plot
ggplot(df_sum, aes(x = Type, y = Mean_score, fill = Score_number)) +
  geom_bar(stat = "identity")

enter image description here

对于您的第二个数据框 - df2

#tidy the dataframe
df_3 <- df2 %>% select(ID, Type1, Score1, Colour) %>% rename(Type = Type1, Score = Score1) %>% mutate(Type_number = as.factor(1), Score_number = as.factor(1))
df_4 <- df2 %>% select(ID, Type2, Score2, Colour) %>% rename(Type = Type2, Score = Score2) %>% mutate(Type_number = as.factor(2), Score_number = as.factor(2), Score = as.numeric(as.character(Score)))
df_tidy_2 <- bind_rows(df_3, df_4) %>% mutate_each(funs(as.factor(.)), ID, Type, Type_number, Score_number, Colour)

#summarise the dataframe - create means of scores by Type
df_sum_2 <- df_tidy_2 %>% group_by(Type, Score_number, Colour) %>% summarise(Mean_score = mean(Score))

#create plot
ggplot(df_sum_2, aes(x = Type, y = Mean_score, fill = Score_number)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Colour, nrow = 1)

enter image description here

我的感觉是,这不是你想要的解决方案吗?我不清楚分析的目的是什么 - 你想要回答的问题是什么。无论如何,希望这会有所帮助。