if语句基于字符串搜索

时间:2015-02-17 09:07:05

标签: r string gsub

我有这样的数据集

     Id       Comment
     1        No, No, No
     2        No, Yes Happy
     3        
     4        No, NA
     5        NA

我正在尝试根据评论栏

中是否存在字符串创建列反馈
     1) If string Happy is not present and cell is not empty then Feedback=No
     2) If string Happy is present then Feedback = Yes
     3) If cell is 
            empty OR 
            cell does not contain either No or Happy 
                then Fe then  = NA
     4) If the cell does not contain either No or Happy then Fe

,输出应如下所示。

     Id       Comment             Feedback
     1        No, No, No          No
     2        No, Yes Happy       Yes
     3                            NA
     4        No, NA              No
     5        NA                  NA

需要帮助

-----------------------原始数据的子集------------------- --- \

     t9 = structure(list(ID = c(242938L, 309790L, 355662L, 361888L, 428033L, 
                   442546L), Comments = c("No,       No,         ", 
                                             "No, Happy,      No,    No,       No,            NA", 
                                             "No,  Happy,     No,   No,     No, No,   No", 
                                             "No,     Happy,   Happy,  Happy,   NA", 
                                             "No,  No,    ", 
                                             "NA     "
                   ), Feedback = c("No", "Yes", "Yes", "Yes", "No", "No")), .Names = c("ID", 
                                                                                        "Comments", "Feedback"), row.names = c(NA, 6L), class = "data.frame")

2 个答案:

答案 0 :(得分:2)

您可以尝试:

df$Feedback <- sapply(df$Comment, function(x) ifelse(is.na(x)|x %in% c("", "NA"), NA, ifelse(grepl("Happy", x), "Yes", "No")))

df
    # Id       Comment Feedback
# 1  1    No, No, No       No
# 2  2 No, Yes Happy      Yes
# 3  3                   <NA>
# 4  4        No, NA       No
# 5  5          <NA>     <NA>
# 6  6            NA     <NA>

数据

        df <- structure(list(Id = c("1", "2", "3", "4", "5", "6"), Comment = c("No, No, No", 
"No, Yes Happy", "", "No, NA", NA, "NA")), .Names = c("Id", "Comment"
), row.names = c(NA, 6L), class = "data.frame")

使用您的data.frame:

 t9$Feedback <- sapply(gsub("\\s","",t9$Comments), function(x) ifelse(is.na(x)|x %in% c("", "NA"), NA, ifelse(grepl("Happy", x), "Yes", "No")))
t9
   #       ID                                           Comments Feedback
   # 1 242938                             No,       No,                No
   # 2 309790 No, Happy,      No,    No,       No,            NA      Yes
   # 3 355662         No,  Happy,     No,   No,     No, No,   No      Yes
   # 4 361888               No,     Happy,   Happy,  Happy,   NA      Yes
   # 5 428033                                       No,  No,           No
   # 6 442546                                            NA          <NA>

答案 1 :(得分:1)

您也可以尝试

df$Feedback <- c('No', 'Yes', NA)[grepl('Yes', df$Comment)+
          2*(!nzchar(df$Comment)|is.na(df$Comment))+1]

df
#    Id       Comment Feedback
#1  1    No, No, No       No
#2  2 No, Yes Happy      Yes
#3  3                   <NA>
#4  4        No, NA       No
#5  5          <NA>     <NA>

或使用factor

factor(with(df, 1+grepl('Yes', Comment) +
    2*(!nzchar(Comment)|is.na(Comment))), labels=c("No", "Yes", NA))

更新

基于新数据集“t9”,“NAs”是字符,并且有空格。

library(stringr)
#remove the leading/lagging spaces 
t9$Comments <- str_trim(t9$Comments)
#change the character "NA" to real NAs so that the previous code works
is.na(t9$Comments) <- t9$Comments=='NA'
#changed "Yes" to "Happy", column names etc.
t9$Feedback <- c('No', 'Yes', NA)[grepl('Happy', t9$Comments)+
          2*(!nzchar(t9$Comments)|is.na(t9$Comments))+1]

t9
#      ID                                           Comments Feedback
#1 242938                                      No,       No,       No
#2 309790 No, Happy,      No,    No,       No,            NA      Yes
#3 355662         No,  Happy,     No,   No,     No, No,   No      Yes
#4 361888               No,     Happy,   Happy,  Happy,   NA      Yes
#5 428033                                           No,  No,       No
#6 442546                                               <NA>     <NA>

数据

df <- structure(list(Id = 1:5, Comment = c("No, No, No", "No, Yes Happy", 
"", "No, NA", NA)), .Names = c("Id", "Comment"), class = "data.frame", 
row.names = c(NA, -5L))