我有一个像这样的数据框:
df <- data.frame(stringsAsFactors=FALSE,
member = c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 3L, 5L),
q_c3_1 = c("A", "B", "C", "A", "B", "C", "A", "B", "C", "A"),
q_c4_1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
q_c5_1 = c(1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L,
1900L),
q_c6_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
q_c7_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
q_c3_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c4_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c5_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c6_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c7_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c3_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c4_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c5_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c6_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c7_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c3_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c4_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c5_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c6_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c7_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c3_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c4_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c5_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c6_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
q_c7_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
)
基于成员变量,我需要用虚拟数据填充相应的变量。例如,如果member = 2,则q_c3_2:q_c7_2
应该具有虚拟值-> q_c3 = 某些字符,例如“ Arne” ,具有1的 q_c4 和具有1900的 q_c5 和具有 0 q_c6 和 q_c7 >,如果member == 3,则q_c3_2:q_c7_2
和q_c3_3:q_c7_3
应该具有伪值(与上述伪值相同),依此类推。 tidyverse如何有效地做到这一点?谢谢
我的愿望输出应类似于此数据框
df2 <- data.frame(stringsAsFactors=FALSE,
member = c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 3L, 5L),
q_c3_1 = c("A", "B", "C", "A", "B", "C", "A", "B", "C", "A"),
q_c4_1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
q_c5_1 = c(1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L,
1900L),
q_c6_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
q_c7_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
q_c3_2 = c(NA, NA, "Arne", NA, NA, NA, NA, "Arne", "Arne", "Arne"),
q_c4_2 = c(NA, NA, 1L, NA, NA, NA, NA, 1L, 1L, 1L),
q_c5_2 = c(NA, NA, 1900L, NA, NA, NA, NA, 1900L, 1900L, 1900L),
q_c6_2 = c(NA, NA, 0L, NA, NA, NA, NA, 0L, 0L, 0L),
q_c7_2 = c(NA, NA, 0L, NA, NA, NA, NA, 0L, 0L, 0L),
q_c3_3 = c(NA, NA, NA, NA, NA, NA, NA, "Arne", "Arne", "Arne"),
q_c4_3 = c(NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L),
q_c5_3 = c(NA, NA, NA, NA, NA, NA, NA, 1900L, 1900L, 1900L),
q_c6_3 = c(NA, NA, NA, NA, NA, NA, NA, 0L, 0L, 0L),
q_c7_3 = c(NA, NA, NA, NA, NA, NA, NA, 0L, 0L, 0L),
q_c3_4 = c(NA, NA, NA, NA, NA, NA, NA, "Arne", NA, "Arne"),
q_c4_4 = c(NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L),
q_c5_4 = c(NA, NA, NA, NA, NA, NA, NA, 1900L, NA, 1900L),
q_c6_4 = c(NA, NA, NA, NA, NA, NA, NA, 0L, NA, 0L),
q_c7_4 = c(NA, NA, NA, NA, NA, NA, NA, 0L, NA, 0L),
q_c3_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, "Arne"),
q_c4_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L),
q_c5_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 1900L),
q_c6_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L),
q_c7_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L)
)
答案 0 :(得分:1)
假设虚拟变量无关紧要,并使用dplyr:
library(dplyr)
temp <- df %>%
melt(id.vars = "member") %>%
mutate(compare = as.numeric(gsub("q_c\\d_(\\d)", "\\1", variable))) %>%
filter(compare <= member) %>%
mutate(value = "dummy",
compare = NULL) %>%
unique() %>%
spread(variable, value)
df <- df %>%
select(member) %>%
left_join(., temp, by = "member")
编辑:根据要求使用虚拟变量。
library(dplyr)
temp <- df %>%
melt(id.vars = "member") %>%
mutate(compare = as.numeric(gsub("q_c\\d_(\\d)", "\\1", variable)),
dummy_match = as.numeric(gsub("q_c(\\d)_\\d", "\\1", variable))) %>%
filter(compare <= member) %>%
mutate(value = case_when(dummy_match == 4 ~ 1,
dummy_match == 5 ~ 1900,
dummy_match >= 6 ~ 0,
T ~ 9999),
compare = NULL,
dummy_match = NULL) %>%
unique() %>%
spread(variable, value)
df <- df %>%
select(member) %>%
left_join(., temp, by = "member")
df[df == 9999] <- "Arne"