我有一个数据框(date
,感兴趣的结果v169
和群组treated
),如下所示:
Dataframe <- data.frame(
date = structure(c(12482, 12499, 12478, 12484, 12477, 12492, 12475, 12490, 12490, 12482, 12488, 12474, 12487, 12474, 12473, 12473, 12478, 12474, 12481, 12474, 12489, 12485, 12479, 12479, 12479, 12479, 12481, 12477, 12474, 12481, 12481, 12478, 12478, 12473, 12479, 12479, 12478, 12482, 12480, 12478, 12480, 12479, 12475, 12481, 12480, 12477, 12477, 12477, 12476, 12476, 12474, 12473, 12474, 12483, 12472, 12479, 12481, 12488, 12481, 12482, 12481, 12482, 12488, 12478, 12474, 12481, 12481, 12480, 12478, 12479, 12475, 12476, 12478, 12482, 12479, 12478, 12478, 12477, 12479, 12479, 12479, 12479, 12478, 12480, 12478, 12487, 12482, 12475, 12475, 12474, 12474, 12478, 12473, 12485, 12482, 12473, 12474, 12472, 12478, 12478, 12479, 12479, 12488, 12476, 12492, 12493, 12479, 12482, 12480, 12476, 12476, 12482, 12479, 12475, 12472, 12475, 12475, 12475, 12482, 12482, 12482, 12478, 12480, 12485, 12480, 12482, 12481, 12480, 12480, 12480, 12480, 12478, 12481, 12478, 12478, 12479, 12481, 12481, 12482, 12482, 12482, 12479, 12478, 12476, 12483, 12475, 12477, 12477, 12480, 12485, 12485, 12479, 12476, 12480, 12476, 12481, 12485, 12479, 12480, 12484, 12479, 12481, 12487, 12490, 12486, 12482, 12480, 12494, 12493, 12485, 12479, 12477, 12477, 12481, 12481, 12483, 12480, 12479, 12483, 12472, 12474, 12471, 12482, 12479, 12489, 12480, 12494, 12481, 12483, 12483, 12488, 12471, 12476, 12482), class = "Date"),
v169 = c(1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0),
treated = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
我想使用此代码绘制treated == 1
和treated == 0
之间的结果比例v169
(如此处所述:Plot difference between proportions for subgroups):
Dataframe %>%
mutate(
treated2 = factor(treated, levels = c("0", "1")),
date = as.POSIXct(date)) %>% #convert date
group_by(treated2, date) %>% #group
summarise(
prop = sum(v169 == "1") / n()) %>% #calculate proportion
spread(treated2, prop) %>%
mutate(propdiff = 1 - 0) %>% #I tried using "1" and "0" here but then get the error "Error in mutate_impl(.data, dots) : Evaluation error: non-numeric argument to binary operator."
ggplot(aes(date, propdiff)) +
geom_line() +
geom_point()
不幸的是,该解决方案无法使用此数据集(差异始终为1.00
(如果propdiff = 1 - 0
)或-1.00
(如果propdiff = 0 - 1
,即似乎没有考虑两条数据线中的一条。
我无法弄清楚原因 - 代码与我在另一个问题中提供的样本数据集一起工作但似乎对我无法确定的数据有特殊性。我确定这不是一个经验丰富的程序员花费很多头痛的事情,但我无法弄明白。有人能指出我正确的方向吗?
答案 0 :(得分:1)
尝试在mutate
来电中使用反引号,即“1”和“0”。
library(tidyverse)
Dataframe %>%
mutate(
treated2 = factor(treated, levels = c("0", "1")),
date = as.POSIXct(date)) %>%
group_by(treated2, date) %>%
summarise(
prop = sum(v169 == "1") / n()) %>%
spread(treated2, prop) %>%
mutate(propdiff = `1` - `0`) %>% #use backticks here
ggplot(aes(date, propdiff)) +
geom_line() +
geom_point()
警告:1:删除了包含缺失值的两行(geom_path)。 2:删除了包含缺失值的7行(geom_point)。
在此答案中出现缺失值的情况下,使用geom_line
连接点的一种方法:https://stackoverflow.com/a/9641380/8583393
df <- Dataframe %>%
mutate(
treated2 = factor(treated, levels = c("0", "1")),
date = as.POSIXct(date)) %>%
group_by(treated2, date) %>%
summarise(
prop = sum(v169 == "1") / n()) %>%
spread(treated2, prop) %>%
mutate(propdiff = `1` - `0`)
df %>%
ggplot(aes(date, propdiff)) +
geom_line(data = df[!is.na(df$propdiff), ]) +
geom_point()