我想计算每天报告做某事的人数百分比。例如,我想知道整个样本中报告在星期一做某事的人数百分比。
我使用以下代码进行计算,但是我不确定结果。
df1 <- structure(list(id = c(12L, 123L, 10L), t1_1 = c(0L, 0L, 1L),
t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L), t2_1 = c(0L,
1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L), t3_1 = c(1L,
0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L), t4_1 = c(0L,
1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L), t5_1 = c(0L,
1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L), t6_1 = c(1L,
0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L), t7_1 = c(0L,
1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)),
class = "data.frame", row.names = c(NA, -3L))
变量描述t1-星期一(t1_1,t1_2,t1_3-是在星期一进行工作的时间步长); t2-星期二; t3-星期三; t4-星期四; t5-星期五; t6-Saturda和t7- Sunday id是一个识别号
df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)
df2$day <- factor(df2$day, levels = variable)
df3<-df2 %>%
group_by (day) %>%
mutate (percent = (value/sum(value) *100))
ggplot(df3, aes(day, group = value)) +
geom_bar(aes(y = ..prop.., fill = factor(..x..)), stat="count") +
scale_fill_discrete(name="Days", labels=c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) +
scale_y_continuous(labels=scales::percent, limits=c(0,1)) +
ylab("relative frequencies") +
theme_bw()
结果:
答案 0 :(得分:1)
library(dplyr)
df1 <- structure(
list(id = c(12L, 123L, 10L),
t1_1 = c(0L, 0L, 1L), t1_2 = c(1L, 0L, 1L), t1_3 = c(1L, 0L, 1L),
t2_1 = c(0L, 1L, 1L), t2_2 = c(1L, 1L, 1L), t2_3 = c(0L, 1L, 1L),
t3_1 = c(1L, 0L, 1L), t3_2 = c(0L, 0L, 1L), t3_3 = c(1L, 0L, 1L),
t4_1 = c(0L, 1L, 1L), t4_2 = c(1L, 1L, 1L), t4_3 = c(0L, 1L, 1L),
t5_1 = c(0L, 1L, 1L), t5_2 = c(1L, 1L, 1L), t5_3 = c(0L, 1L, 1L),
t6_1 = c(1L, 0L, 1L), t6_2 = c(1L, 0L, 1L), t6_3 = c(1L, 0L, 1L),
t7_1 = c(0L, 1L, 1L), t7_2 = c(0L, 1L, 1L), t7_3 = c(1L, 1L, 1L)),
class = "data.frame", row.names = c(NA, -3L))
df2 <- reshape2::melt(df1, id.vars = "id")
df2$variable <- as.character(df2$variable)
df2$day <- sapply(strsplit(df2$variable, "_"), `[`, 1)
df3 <- df2 %>%
group_by(id, day) %>%
summarize(count = sum(value)) %>%
group_by(id) %>%
mutate(percent = count / sum(count)) %>%
arrange(day, id)
> df3
# A tibble: 21 x 4
# Groups: id [3]
id day count percent
<int> <chr> <int> <dbl>
1 10 t1 3 0.143
2 12 t1 2 0.182
3 123 t1 0 0
4 10 t2 3 0.143
5 12 t2 1 0.0909
6 123 t2 3 0.25
...
您正在寻找的东西吗?