因此,我想采用以下给出的代码,将每个性别除以性别总人口(F为5018,M为5827),以便获得分数作为其性别人口的百分比。这将使结果更加准确,否则M标记率总是较高,因为其人口较高。
我已经包含以下代码...我陷入了需要将其除以总人口的问题。有太多的路要走,我很困惑哪一种是最简单的。
# to give the count of the number of M and F in data total no of students is 10,845
# project2data %>% group_by(project2data$Gender) %>% summarise(Gender_count=n())
# in graph frequency of females has to be divided by 5018 and males by 5827
for (row in 1:nrow(project2data)) {
mark = project2data[row, 8]
if (mark >= 0 && mark < 50) {
project2data[row, 27] = '0 - 49: Fail '
} else if (mark >= 50 && mark < 65) {
project2data[row, 27] = ' 50 - 64 : Pass'
} else if (mark >= 65 && mark < 75) {
project2data[row, 27] = ' 65 - 74: Credit'
} else if (mark >= 75 && mark < 85) {
project2data[row, 27] = ' 74 - 84 : Distinction'
} else if (mark >= 75 && mark < 101) {
project2data[row, 27] = ' 85 - 100: High Distinction'
}
}
gender_mark = table(project2data$V27, project2data$Gender)
gender_mark
这将产生以下内容
因此,我想将每行除以各自的性别人口。 这是我对ggplot的编码
gm = as.data.frame(gender_mark)
g2 = ggplot(gm, aes(x = Var1, y = Freq, fill = Var2)) + geom_bar(stat = 'identity', position = position_dodge()) + ggtitle('Marks by Gender') + labs(x = 'Mark Ranges', y = 'Percentage of Population') + guides(fill = guide_legend(title = 'Gender')) + theme(axis.text.x = element_text(size = 6, angle = 0)) + scale_fill_manual(values=wes_palette(n=4, name="GrandBudapest2")) +theme(title = element_text(size = 10))
g2
非常感谢您的帮助!
答案 0 :(得分:0)
您可以使用sweep
将每个值除以该列的总和。
result <- sweep(gender_mark, 2, colSums(gender_mark), `/`)
或使用转置:
result <- t(t(gender_mark)/colSums(gender_mark))
答案 1 :(得分:0)
将表格转换为data.frame后,您可以使用例如tapply
或使用dplyr::group_by
和dplyr::mutate
:
library(ggplot2)
library(wesanderson)
library(dplyr)
gm <- data.frame(
Var1 = c('0 - 49: Fail ', ' 50 - 64 : Pass', ' 65 - 74: Credit', ' 74 - 84 : Distinction', ' 85 - 100: High Distinction'),
Var2 = rep(c("F", "M"), each = 5),
Freq = c(1493, 1411, 1199, 369, 546, 1630, 1703, 1314, 474, 706)
)
# base R
gm$prop <- gm$Freq / tapply(gm$Freq, gm$Var2, sum)[gm$Var2]
# dplyr
gm <- gm %>% group_by(Var2) %>% mutate(prop = Freq / sum(Freq))
g2 <- ggplot(gm, aes(x = Var1, y = prop, fill = Var2)) +
geom_bar(stat = 'identity', position = position_dodge()) +
scale_fill_manual(values = wes_palette(n=4, name="GrandBudapest2")) +
scale_y_continuous(labels = scales::percent) +
labs(title = 'Marks by Gender', x = 'Mark Ranges', y = 'Percentage of Population') +
guides(fill = guide_legend(title = 'Gender')) +
theme(axis.text.x = element_text(size = 6, angle = 0)) +
theme(title = element_text(size = 10))
g2