我正在努力创建一个包含多个分类变量的条形图,所以我当前的数据看起来像这样
df <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c(20,40,60,80))
ID Type1 Score1 Type2 Score2
1 A 10 C 20
2 B 20 C 40
3 A 30 D 60
4 B 40 D 80
我正在寻找的应该是这样的
接下来我想通过在图表中添加更多类别来进一步实现这一点,因此数据框现在是
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))
ID Type1 Score1 Type2 Score2 Colour
1 A 10 C 20 Black
2 B 20 C 40 White
3 A 30 D 60 Black
4 B 40 D 80 White
对于这个,它应该看起来像这样
当只有一种类型时,我通常reshape
数据并使用ggplot
命令执行此操作。 (我在网上找到的大部分答案都只是这个)
但是,现在不仅仅是一种类型。我不确定我该怎么做。
有什么建议吗?
答案 0 :(得分:1)
关于第一个请求,请尝试:
df <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c(20,40,60,80))
ab_means <- tapply(df$Score1, df$Type1, mean)
cd_means <- tapply(df$Score2, df$Type2, mean)
par(xpd = TRUE)
bp <- barplot(c(ab_means, cd_means), main = 'Average', ylab = 'Score', font.lab = 2)
text(x = mean(bp[1:2,]), y = -10, labels = 'Score 1', font = 2)
text(x = mean(bp[3:4,]), y = -10, labels = 'Score 2', font = 2)
ns <- length(df$Score1[df$Type1 == 'A'])
ns1 <- length(df$Score1[df$Type1 == 'B'])
ns2 <- length(df$Score2[df$Type2 == 'C'])
ns3 <- length(df$Score2[df$Type2 == 'D'])
text(x = bp, y = c(ab_means, cd_means)+1.5, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
关于更复杂的请求,请尝试:
# Make the dataframe
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))
# Define plot region
par(mfcol = c(1,2), # two columns, one row
xpd = TRUE, # let things be plotted outside of the plotting region
mar = c(5, 4, 6, 2)) # margins of the plotting region
# Compute the means
ab_black_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'Black'])), df$Type1[df2$Colour == 'Black'], mean)
cd_black_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'Black'])), df$Type2[df2$Colour == 'Black'], mean)
ab_white_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'White'])), df$Type1[df2$Colour == 'White'], mean)
cd_white_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'White'])), df$Type2[df2$Colour == 'White'], mean)
# Compute the maximum score value to set the scale
all <- as.numeric(as.matrix(df2[, c('Score1', 'Score2')]))
max_all <- ceiling(max(all, na.rm = TRUE))
# First barplot
bp1 <- barplot(c(ab_black_means, cd_black_means), ylab = 'Score', font.lab = 2, ylim = c(0, max_all))
text(x = mean(bp1[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp1[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
# Compute the Ns to plot above each bar
ns <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'Black']))
ns1 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'Black']))
ns2 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'Black']))
ns3 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'Black']))
text(x = bp1, y = c(ab_black_means, cd_black_means)+2, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
axis(side = 1, at = bp1, labels = rep('', 4))
text(x = mean(bp1[1:4]), y = par('usr')[3]-16, labels = 'Black', font = 2, cex = 1.3)
# Second barplot
bp2 <- barplot(c(ab_white_means, cd_white_means), yaxt = 'n', ylim = c(0, max_all))
text(x = mean(bp2[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp2[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
axis(side = 1, at = bp2, labels = rep('', 4))
# Compute the Ns to plot above each bar
ns4 <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'White']))
ns5 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'White']))
ns6 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'White']))
ns7 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'White']))
text(x = bp1, y = c(ab_white_means, cd_white_means)+2, labels = sapply(as.character(c(ns4, ns5, ns6, ns7)), function(x) paste('n =', x)))
text(x = mean(bp2[1:4]), y = par('usr')[3]-16, labels = 'White', font = 2, cex = 1.3)
# Final title
title(main = 'Average Score', outer = TRUE, line = -3)
在最后一个情节中,有些酒吧不会出现。这是因为无法计算某些方法(请注意,我们无法为因子mean
和颜色B
提供'Black'
。
如果您真的希望x轴连续不间断,可以使用curve()
:
# Make the dataframe
df2 <- data.frame(ID = c(1,2,3,4), Type1 = c("A","B","A","B"), Score1 = c(10,20,30,40), Type2 = c("C","C","D","D"), Score2 = c("20","40","60","80"), Colour = c("Black","White","Black","White"))
# Define plot region
par(mfcol = c(1,2), # two columns, one row
xpd = TRUE, # let things be plotted outside of the plotting region
mar = c(5, 4, 6, 2)) # margins of the plotting region
# Compute the means
ab_black_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'Black'])), df$Type1[df2$Colour == 'Black'], mean)
cd_black_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'Black'])), df$Type2[df2$Colour == 'Black'], mean)
ab_white_means <- tapply(as.numeric(as.character(df2$Score1[df2$Colour == 'White'])), df$Type1[df2$Colour == 'White'], mean)
cd_white_means <- tapply(as.numeric(as.character(df2$Score2[df2$Colour == 'White'])), df$Type2[df2$Colour == 'White'], mean)
# Compute the maximum score value to set the scale
all <- as.numeric(as.matrix(df2[, c('Score1', 'Score2')]))
max_all <- ceiling(max(all, na.rm = TRUE))
# First barplot
bp1 <- barplot(c(ab_black_means, cd_black_means), ylab = 'Score', font.lab = 2, ylim = c(0, max_all))
text(x = mean(bp1[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp1[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
# Compute the Ns to plot above each bar
ns <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'Black']))
ns1 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'Black']))
ns2 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'Black']))
ns3 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'Black']))
text(x = bp1, y = c(ab_black_means, cd_black_means)+2, labels = sapply(as.character(c(ns, ns1, ns2, ns3)), function(x) paste('n =', x)))
axis(side = 1, at = bp1, labels = rep('', 4))
text(x = mean(bp1[1:4]), y = par('usr')[3]-16, labels = 'Black', font = 2, cex = 1.3)
# First curve added
curve(expr = 0*x+0, from = 0, to = 6, add = TRUE, col = 'black')
# Second barplot
bp2 <- barplot(c(ab_white_means, cd_white_means), yaxt = 'n', ylim = c(0, max_all))
text(x = mean(bp2[1:2,]), y = par('usr')[3]-11, labels = 'Score 1', font = 2)
text(x = mean(bp2[3:4,]), y = par('usr')[3]-11, labels = 'Score 2', font = 2)
axis(side = 1, at = bp2, labels = rep('', 4))
# Compute the Ns to plot above each bar
ns4 <- c(length(df2$Score1[df2$Type1 == 'A' & df2$Colour == 'White']))
ns5 <- c(length(df2$Score1[df2$Type1 == 'B' & df2$Colour == 'White']))
ns6 <- c(length(df2$Score1[df2$Type2 == 'C' & df2$Colour == 'White']))
ns7 <- c(length(df2$Score1[df2$Type2 == 'D' & df2$Colour == 'White']))
text(x = bp1, y = c(ab_white_means, cd_white_means)+2, labels = sapply(as.character(c(ns4, ns5, ns6, ns7)), function(x) paste('n =', x)))
text(x = mean(bp2[1:4]), y = par('usr')[3]-16, labels = 'White', font = 2, cex = 1.3)
# Second curve added
curve(expr = 0*x+0, from = -10, to = 5, add = TRUE, col = 'black')
# Final title
title(main = 'Average Score', outer = TRUE, line = -3)
请告诉我它是否适合您,如果我可以进一步帮助您。希望它有所帮助!
答案 1 :(得分:1)
我试图在这里创建一个tidyverse
解决方案:
对于您的第一个数据框 - df
library(tidyverse)
#tidy the dataframe
df_1 <- df %>% select(ID, Type1, Score1) %>% rename(Type = Type1, Score = Score1) %>% mutate(Type_number = as.factor(1), Score_number = as.factor(1))
df_2 <- df %>% select(ID, Type2, Score2) %>% rename(Type = Type2, Score = Score2) %>% mutate(Type_number = as.factor(2), Score_number = as.factor(2))
df_tidy <- bind_rows(df_1, df_2) %>% mutate_each(funs(as.factor(.)), ID, Type, Type_number, Score_number)
#summarise the dataframe - create means of scores by Type
df_sum <- df_tidy %>% group_by(Type, Score_number) %>% summarise(Mean_score = mean(Score))
#create plot
ggplot(df_sum, aes(x = Type, y = Mean_score, fill = Score_number)) +
geom_bar(stat = "identity")
对于您的第二个数据框 - df2
#tidy the dataframe
df_3 <- df2 %>% select(ID, Type1, Score1, Colour) %>% rename(Type = Type1, Score = Score1) %>% mutate(Type_number = as.factor(1), Score_number = as.factor(1))
df_4 <- df2 %>% select(ID, Type2, Score2, Colour) %>% rename(Type = Type2, Score = Score2) %>% mutate(Type_number = as.factor(2), Score_number = as.factor(2), Score = as.numeric(as.character(Score)))
df_tidy_2 <- bind_rows(df_3, df_4) %>% mutate_each(funs(as.factor(.)), ID, Type, Type_number, Score_number, Colour)
#summarise the dataframe - create means of scores by Type
df_sum_2 <- df_tidy_2 %>% group_by(Type, Score_number, Colour) %>% summarise(Mean_score = mean(Score))
#create plot
ggplot(df_sum_2, aes(x = Type, y = Mean_score, fill = Score_number)) +
geom_bar(stat = "identity") +
facet_wrap(~Colour, nrow = 1)
我的感觉是,这不是你想要的解决方案吗?我不清楚分析的目的是什么 - 你想要回答的问题是什么。无论如何,希望这会有所帮助。