我有一个闪亮的应用程序,其中的某些输入选择可能会使数据在进行某些计算时过滤为零观测值。当我按周和组计数观察值,然后尝试将此结果数据集加入下游的另一个数据集时,这会引起问题。
例如,如果下面的dat_2
的计算过滤为零观测值,则我得到的group_by
和count()
如下:
# A tibble: 1 x 3
# Groups: date, diss_group [1]
date diss_group date2_n
<date> <fct> <int>
1 NA NA 0
当我需要在几周内按组进行零计数时:
# A tibble: 22 x 3
date diss_group date1_n
<date> <chr> <dbl>
1 2019-05-15 a 0
2 2019-05-22 a 0
3 2019-05-29 a 0
4 2019-06-05 a 0
5 2019-06-12 a 0
6 2019-06-19 a 0
7 2019-06-26 a 0
8 2019-07-03 a 0
9 2019-07-10 a 0
10 2019-07-17 a 0
# … with 12 more rows
这是一个例子
library(tidyverse)
# selecting date range via Shiny input
start <- "2019-05-15"
end <- "2019-07-25"
startdate_adjusted <- ymd(start) - lubridate::floor_date(ymd(start),
"1 week")
# generate some data
dat <- data.frame(date1 = c("2019-05-04",
"2019-05-15",
"2019-05-18",
"2019-05-23",
"2019-05-23",
"2019-06-10",
"2019-06-15",
"2019-06-25",
"2019-06-26"),
date2 = c("2019-05-06",
"2019-05-16",
NA,
NA,
"2019-06-06",
"2019-06-15",
NA,
"2019-06-29",
"2019-06-29"),
Total = as.character(rep("Total", 9)),
letter = as.character(c("a", "a", "b",
"a", "a", "b",
"a", "a", "b"))
)
dat <-
dat %>%
mutate(date1 = ymd(as.character(date1))) %>%
mutate(date2 = ymd(as.character(date2))) %>%
mutate(Total = factor(Total, levels="Total")) %>%
mutate(letter = factor(letter, levels=c("a", "b")))
# count 1
dat_1 <-
dat %>%
filter(!is.na(date1)) %>%
mutate(date = date(date1)) %>%
mutate(diss_group = letter) %>%
filter(!is.na(diss_group)) %>%
filter(diss_group!="") %>%
group_by(date = lubridate::floor_date(date, "1 week") + startdate_adjusted,
diss_group) %>%
count(name = "date1_n") %>%
spread(diss_group, date1_n) %>%
ungroup() %>%
padr::pad(interval = "1 week",
by = "date",
start_val = ymd(start),
end_val = ymd(end)
) %>%
replace(is.na(.), 0) %>%
gather("diss_group", "date1_n", -date)
dat_1
# # A tibble: 22 x 3
# date diss_group date1_n
# <date> <chr> <dbl>
#1 2019-05-15 a 1
#2 2019-05-22 a 2
#3 2019-05-29 a 0
#4 2019-06-05 a 0
#5 2019-06-12 a 1
#6 2019-06-19 a 0
#7 2019-06-26 a 1
#8 2019-07-03 a 0
#9 2019-07-10 a 0
#10 2019-07-17 a 0
# … with 12 more rows
# count 2, problem from `filter(letter=="c")`
dat_2 <-
dat %>%
filter(!is.na(date2)) %>%
mutate(date = date(date2)) %>%
mutate(diss_group = letter) %>%
filter(!is.na(diss_group)) %>%
filter(diss_group!="") %>%
filter(letter=="c") %>% # letter c does not exist
group_by(date = lubridate::floor_date(date, "1 week") + startdate_adjusted,
diss_group) %>%
count(name = "date2_n") %>%
spread(diss_group, date2_n) %>%
ungroup() %>%
padr::pad(interval = "1 week",
by = "date",
start_val = ymd(start),
end_val = ymd(end)
) %>%
replace(is.na(.), 0) %>%
gather("diss_group", "date2_n", -date)
答案 0 :(得分:0)
将.drop = FALSE
添加到产生的group_by
语句中:
# A tibble: 1 x 3
# Groups: date [1]
date a b
<date> <int> <int>
1 NA 0 0
如果date
与start
在一起,那么从这里我将is.na(date)
设置为等于mutate_if(is.Date, funs(if_else(is.na(.), ymd(start), .)))
dat_2 <-
dat %>%
filter(!is.na(date2)) %>%
mutate(date = date(date2)) %>%
mutate(diss_group = letter) %>%
filter(!is.na(diss_group)) %>%
filter(diss_group!="") %>%
filter(letter=="c") %>% # letter c does not exist
group_by(date = lubridate::floor_date(date, "1 week") + startdate_adjusted,
diss_group,
.drop = FALSE) %>% #### Added
count(name = "date2_n") %>%
spread(diss_group, date2_n) %>%
ungroup() %>%
mutate_if(is.Date, funs(if_else(is.na(.), ymd(start), .))) %>% #### Added
padr::pad(interval = "1 week",
by = "date",
start_val = ymd(start),
end_val = ymd(end)
) %>%
replace(is.na(.), 0) %>%
gather("diss_group", "date2_n", -date)
产生:
# A tibble: 22 x 3
date diss_group date2_n
<date> <chr> <dbl>
1 2019-05-15 a 0
2 2019-05-22 a 0
3 2019-05-29 a 0
4 2019-06-05 a 0
5 2019-06-12 a 0
6 2019-06-19 a 0
7 2019-06-26 a 0
8 2019-07-03 a 0
9 2019-07-10 a 0
10 2019-07-17 a 0
# … with 12 more rows