根据R中的特定col将虚拟值数据填充到数据框

时间:2019-01-09 10:14:23

标签: r dplyr tidyr

我有一个像这样的数据框:

df <- data.frame(stringsAsFactors=FALSE,
        member = c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 3L, 5L),
      q_c3_1 = c("A", "B", "C", "A", "B", "C", "A", "B", "C", "A"),
      q_c4_1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
      q_c5_1 = c(1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L,
                 1900L),
      q_c6_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
      q_c7_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
      q_c3_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c4_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c5_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c6_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c7_2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c3_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c4_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c5_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c6_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c7_3 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c3_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c4_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c5_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c6_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c7_4 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c3_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c4_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c5_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c6_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
      q_c7_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
)

基于成员变量,我需要用虚拟数据填充相应的变量。例如,如果member = 2,则q_c3_2:q_c7_2应该具有虚拟值-> q_c3 = 某些字符,例如“ Arne” ,具有1的 q_c4 和具有1900的 q_c5 和具有 0 q_c6 q_c7 >,如果member == 3,则q_c3_2:q_c7_2q_c3_3:q_c7_3应该具有伪值(与上述伪值相同),依此类推。 tidyverse如何有效地做到这一点?谢谢

我的愿望输出应类似于此数据框

df2 <- data.frame(stringsAsFactors=FALSE,
                  member = c(1L, 1L, 2L, 1L, 1L, 1L, 1L, 4L, 3L, 5L),
                  q_c3_1 = c("A", "B", "C", "A", "B", "C", "A", "B", "C", "A"),
                  q_c4_1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
                  q_c5_1 = c(1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L, 1900L,
                             1900L),
                  q_c6_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
                  q_c7_1 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
                  q_c3_2 = c(NA, NA, "Arne", NA, NA, NA, NA, "Arne", "Arne", "Arne"),
                  q_c4_2 = c(NA, NA, 1L, NA, NA, NA, NA, 1L, 1L, 1L),
                  q_c5_2 = c(NA, NA, 1900L, NA, NA, NA, NA, 1900L, 1900L, 1900L),
                  q_c6_2 = c(NA, NA, 0L, NA, NA, NA, NA, 0L, 0L, 0L),
                  q_c7_2 = c(NA, NA, 0L, NA, NA, NA, NA, 0L, 0L, 0L),
                  q_c3_3 = c(NA, NA, NA, NA, NA, NA, NA, "Arne", "Arne", "Arne"),
                  q_c4_3 = c(NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L),
                  q_c5_3 = c(NA, NA, NA, NA, NA, NA, NA, 1900L, 1900L, 1900L),
                  q_c6_3 = c(NA, NA, NA, NA, NA, NA, NA, 0L, 0L, 0L),
                  q_c7_3 = c(NA, NA, NA, NA, NA, NA, NA, 0L, 0L, 0L),
                  q_c3_4 = c(NA, NA, NA, NA, NA, NA, NA, "Arne", NA, "Arne"),
                  q_c4_4 = c(NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L),
                  q_c5_4 = c(NA, NA, NA, NA, NA, NA, NA, 1900L, NA, 1900L),
                  q_c6_4 = c(NA, NA, NA, NA, NA, NA, NA, 0L, NA, 0L),
                  q_c7_4 = c(NA, NA, NA, NA, NA, NA, NA, 0L, NA, 0L),
                  q_c3_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, "Arne"),
                  q_c4_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L),
                  q_c5_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 1900L),
                  q_c6_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L),
                  q_c7_5 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, 0L)
)

1 个答案:

答案 0 :(得分:1)

假设虚拟变量无关紧要,并使用dplyr:

library(dplyr)
temp <- df %>% 
  melt(id.vars = "member") %>% 
  mutate(compare = as.numeric(gsub("q_c\\d_(\\d)", "\\1", variable))) %>% 
  filter(compare <= member) %>% 
  mutate(value = "dummy",
     compare = NULL) %>%
  unique() %>% 
  spread(variable, value)

df <- df %>% 
  select(member) %>% 
  left_join(., temp, by = "member")

编辑:根据要求使用虚拟变量。

library(dplyr)
temp <- df %>% 
 melt(id.vars = "member") %>% 
 mutate(compare = as.numeric(gsub("q_c\\d_(\\d)", "\\1", variable)),
        dummy_match = as.numeric(gsub("q_c(\\d)_\\d", "\\1", variable))) %>% 
 filter(compare <= member) %>% 
 mutate(value = case_when(dummy_match == 4 ~ 1,
                          dummy_match == 5 ~ 1900,
                          dummy_match >= 6 ~ 0,
                          T ~ 9999),
        compare = NULL,
        dummy_match = NULL) %>%
 unique() %>% 
 spread(variable, value)

df <- df %>% 
 select(member) %>% 
 left_join(., temp, by = "member")

df[df == 9999] <- "Arne"