Question

我在使用if / else语句中的字符串的any函数时遇到问题。请注意，函数中的打印（“A”）只是一个示例。如果列包含某些值，我需要执行一系列操作。

随机生成的数据

level=c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree",NA)
df <- data.frame(pre_1=as.character(sample(c("Yes","No", NA), 30, replace = T)), 
                 pre_2=as.character(sample(level, 30, replace = T)),
                 post_1=as.character(sample(level, 30, replace = T)),
                 post_2=as.character(sample(c("<90%", "0-80%", ">90", NA), 30, replace = T)),
                 stringsAsFactors=T)

选择所需数据框的一部分（“post_”）并根据特定列的值打印语句。在这种情况下，我需要为包含特定行值的列打印“A”："Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"

select(df, starts_with("post_")) %>% 
  length() %>% 
  seq(1,.,1)  %>% 
  for (i in .){
      if (any(c("Neither agree or disagree") == (select(df, starts_with("post_"))[i]))){
        print ("A")
      } else {print ("B")}
    }

这给出了错误

Error in if (any(c("Neither agree or disagree") == (select(df, starts_with("post_"))[i]))) { : 
  missing value where TRUE/FALSE needed

请注意，如果我在其下运行代码正确工作

if (any(c("Neither agree or disagree","Agree") == df[3])){print ("A")} else {
  print ("B")}

任何帮助表示赞赏

Answer 1

尽可能避免循环。 R的优势是矢量计算！

尝试以下

results <- character(nrow(df)) # initialise vector
results <- "B" # set B as default

at_least_one <- rowSums(df == "Strongly Agree", na.rm = TRUE) > 0 # find the rows that contain the word searched at least once
results[at_least_one] <- "A" # change those that contain the word to "A"

您可以循环使用您的价值观，例如＆＃34;非常同意＆＃34;，＆＃34;同意＆＃34;并覆盖结果向量！希望有所帮助！

Answer 2

T/F索引到LETTERS时有点天真的方法： 1.通过colptrn选择落入模式grepl的列; 2.将df转到列表中; 3. sapply通过列表项，将它们与您的清单进行比较; 4.如果有TRUE，那么FALSE + 1 = "A"，如果不是TRUE + 1 = "B";

fu <- function(df, i, colptrn, na.rm = T){
    sapply(as.list(df[grepl(colptrn, colnames(df))]), 
           function(li) LETTERS[1 + !any(i %in% li, na.rm = na.rm)]
           )
    }

## Test 
fu(df, c("Neither agree or disagree", "Agree"), "post_")
post_1 post_2 
   "A"    "B" 
fu(df, c("Neither agree or disagree", "Agree"), ".*")
 pre_1  pre_2 post_1 post_2 
   "B"    "A"    "A"    "B" 
fu(df, c("Neither agree or disagree", "Agree"), "postpostup")
named list()

Answer 3

我使用@ tobiaspk1的建议并使用rowSums来调整列的某些值。问题是我希望包含更多条件（每列的所有因素）以确保该函数在其他上下文中有效（例如，当缺少中间类别时）

dfplot <- function(df,prefix){
  select(df, starts_with(prefix)) %>% 
    length() %>% 
    seq(1,.,1)  %>% 
    for (i in .){
      if (dummy(as.character(select(data, starts_with(prefix))[[i]])) == FALSE) {
        if (colSums(select(df, starts_with(prefix))[i] == "Agree", na.rm = TRUE) > 0){
          factor(select(data, starts_with(prefix))[[i]], c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"),ordered = T ) %>% 
            data.frame() %>%
            na.omit() %>%
            ggplot(.,aes(x=.))  +  
            geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
            geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
            scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
            scale_x_discrete(drop=FALSE) + 
            ylab("Relative Frequencies (%)")+
            ggtitle(names(select(data, starts_with(prefix)))[i]) +
            theme_light(base_size = 12) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
            theme(plot.title = element_text(hjust = 0.5,size = 10))-> agreeplot
          print(agreeplot)} 
        else if (colSums(select(df, starts_with(prefix))[i] == "51-75%", na.rm = TRUE) > 0) {
          factor(select(data, starts_with(prefix))[[i]], c("1-25%", "26-50%", "51-75%", "75-90%","91-100%"),ordered = T ) %>% 
            data.frame() %>%
            na.omit() %>%
            ggplot(.,aes(x=.))  +  
            geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
            geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
            scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
            scale_x_discrete(drop=FALSE) + 
            ylab("Relative Frequencies (%)")+
            ggtitle(names(select(data, starts_with(prefix)))[i]) +
            theme_light(base_size = 12) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
            theme(plot.title = element_text(hjust = 0.5,size = 10))-> numplot
          print(numplot)}
        else if(colSums(select(df, starts_with(prefix))[i] == "Somewhat too easy", na.rm = TRUE) > 0) {
          factor(select(data, starts_with(prefix))[[i]], c("Very easy", "Somewhat too easy", "About right", "Somewhat challenging","Very challenging"),ordered = T ) %>% 
            data.frame() %>%
            na.omit() %>%
            ggplot(.,aes(x=.))  +  
            geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
            geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
            scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
            scale_x_discrete(drop=FALSE) + 
            ylab("Relative Frequencies (%)")+
            ggtitle(names(select(data, starts_with(prefix)))[i]) +
            theme_light(base_size = 12) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
            theme(plot.title = element_text(hjust = 0.5,size = 10))-> aboutplot
          print(aboutplot)}
        else if(colSums(select(df, starts_with(prefix))[i] == "Too slow", na.rm = TRUE) > 0) {
          factor(select(data, starts_with(prefix))[[i]], c("Too slow", "Slow", "About right", "Fast","Too fast"),ordered = T ) %>% 
            data.frame() %>%
            na.omit() %>%
            ggplot(.,aes(x=.))  +  
            geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
            geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
            scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
            scale_x_discrete(drop=FALSE) + 
            ylab("Relative Frequencies (%)")+
            ggtitle(names(select(data, starts_with(prefix)))[i]) +
            theme_light(base_size = 12) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
            theme(plot.title = element_text(hjust = 0.5,size = 10))-> rightplot
          print(rightplot)}
        else if(colSums(select(df, starts_with(prefix))[i] == "Between 3 and 4 hours", na.rm = TRUE) > 0) {
          factor(select(data, starts_with(prefix))[[i]], c("Less than 2 hours", "Between 2 and 3 hours", "Between 3 and 4 hours", "Between 4 and 5 hours","More than 5 hours"),ordered = T ) %>% 
            data.frame() %>%
            na.omit() %>%
            ggplot(.,aes(x=.))  +  
            geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
            geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+            
            scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
            scale_x_discrete(drop=FALSE) + 
            ylab("Relative Frequencies (%)")+
            ggtitle(names(select(data, starts_with(prefix)))[i]) +
            theme_light(base_size = 12) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
            theme(plot.title = element_text(hjust = 0.5,size = 10))-> hoursplot
          print(hoursplot)}
        else {data.frame(select(df, starts_with(prefix))[[i]])  %>%
            na.omit() %>%
            ggplot(.,aes(x=.))  +  
            geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
            geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+            
            scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
            scale_x_discrete(drop=FALSE) + 
            ylab("Relative Frequencies (%)")+
            ggtitle(names(select(data, starts_with(prefix)))[i]) +
            theme_light(base_size = 12) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
            theme(plot.title = element_text(hjust = 0.5,size = 10))-> elseplot
          print(elseplot)}}
      else {data.frame(select(df, starts_with(prefix))[[i]])  %>%
          na.omit() %>%
          ggplot(.,aes(x=.))  +  
          geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") + 
          geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
          scale_y_continuous(labels=percent,limits = c(-0, 1)) + 
          scale_x_discrete(drop=FALSE) + 
          ylab("Relative Frequencies (%)") + 
          ggtitle(names(select(df, starts_with(prefix)))[i]) +
          theme_light(base_size = 12) +
          theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
          theme(plot.title = element_text(hjust = 0.5,size = 10)) -> dummyplot
        print(dummyplot)}
    }  
}

if if在数据帧的循环中有任何函数

3 个答案: