Question

如何将字符串从df列拆分为多个列，然后将值分配给正确的变量。

在我的示例中，Q1到Q4是变量名，“ Neutral”等是可能的答案。

我的问题主要在于可能的“ NA”

A <- 'Q1:\r\nNeutral\r\nQ2:\r\nTotally Disagree\r\nQ3:\r\nTotally Agree'
B <- 'Q1:\r\nNeutral\r\nQ2:\r\nNeutral\r\nQ3:\r\nNeutral'
C <- 'Q1:\r\nNeutral\r\nQ3:\r\nNeutral'
D <- ''

df <- as.data.frame(cbind(c(A,B,C,D)))

AllAnswers <- c('Neutral','Totally Disagree', 'Totally Agree', 'Neutral', 'Neutral', 'Neutral', 'Neutral', 'Neutral', '', '', '', '')

DesiredDf <- data.frame(matrix(AllAnswers, nrow = 4, ncol = 3, byrow = TRUE))

Answer 1

我建议：

separate(df,V1,c("a","b","c"),sep='\r\n(?=Q)') %>%
   mutate(id=row_number()) %>% 
   gather(k,v,-id) %>%
   separate(v,c("v1","v2"),":\r\n") %>% 
   select(-k) %>% 
   filter(!is.na(v2)) %>% 
   spread(v1,v2)

但是您的'DesiredDF'中似乎有问题，这是我的输出：

  #id      Q1               Q2            Q3
  #1  1 Neutral Totally Disagree Totally Agree
  #2  2 Neutral          Neutral       Neutral
  #3  3 Neutral             <NA>       Neutral

在“ df”中，第三行没有Q2：

  #                                                                 V1
  #1 Q1:\r\nNeutral\r\nQ2:\r\nTotally Disagree\r\nQ3:\r\nTotally Agree
  #2                Q1:\r\nNeutral\r\nQ2:\r\nNeutral\r\nQ3:\r\nNeutral
  #3                                  Q1:\r\nNeutral\r\nQ3:\r\nNeutral
  #4

或者将行保留为空的V1：

df1 <- df %>% mutate(id=row_number())
df1 %>% separate(V1,c("a","b","c"),sep='\r\n(?=Q)') %>% 
  gather(k,v,-id) %>%
  separate(v,c("v1","v2"),":\r\n") %>% 
  select(-k) %>%
  filter(!is.na(v2)) %>%
  spread(v1,v2) %>% 
  right_join(df1 %>% select(id), by="id")

#  id      Q1               Q2            Q3
#1  1 Neutral Totally Disagree Totally Agree
#2  2 Neutral          Neutral       Neutral
#3  3 Neutral             <NA>       Neutral
#4  4    <NA>             <NA>          <NA>

Answer 2

基于Nicolas2的答案，以下解决方案所需的代码更少：

library(tidyverse)

df %>%
   separate(V1,c("X1","X2","X3"),sep='\r\n(?=Q)') %>%
   mutate_at(vars(X1:X3), funs(str_replace_all(., "[Q[:digit:][:punct:]]", "")))

将字符串拆分为列，然后分配值以更正变量

2 个答案: