答案 0 :(得分:1)
如果我们需要在所有问题上都这样做,则一个选择是将其重塑为“长”格式并获得count
library(dplyr)
library(tidyr)
out <- df1 %>%
pivot_longer(cols = province:village_other,
names_to = "Question_name", values_to= "Text_answer",
values_drop_na = TRUE) %>%
count(enumerator_id, Question_name, Text_answer)
out %>%
filter(Question_name == 'village_other')
# A tibble: 3 x 4
# enumerator_id Question_name Text_answer n
# <dbl> <chr> <chr> <int>
#1 1 village_other Z 3
#2 2 village_other D 2
#3 3 village_other J 1
如果我们需要有单独的列
out %>%
pivot_wider(names_from = Question_name, values_from = n)
另一种选择是使用map
遍历感兴趣的列名,并在count
中获得list
library(purrr)
map(names(df1)[3:6], ~ df1 %>%
filter_at(vars(.x), any_vars(!is.na(.))) %>%
count(enumerator_id, !! rlang::sym(.x)))
df1 <- structure(list(enumerator_id = c(1, 2, 1, 3, 2, 1, 3, 1),
date = c("5/18/2020",
"5/19/2020", "5/20/2020", "5/21/2020", "5/22/2020", "5/23/2020",
"5/24/2020", "5/25/2020"), province = c("A", "C", "X", "E", "A",
"C", "H", "A"), district = c("B", "A", "Y", "F", "B", "A", "I",
"B"), village = c("C", NA, NA, "G", NA, NA, NA, NA), village_other = c(NA,
"D", "Z", NA, "D", "Z", "J", "Z")), class = "data.frame", row.names = c(NA,
-8L))