Question

我有一个循环来重新编码列的值，并在满足条件时中断。我想在具有相同格式的数据帧列表中使用此循环或其基本概念。

示例数据：

Id <- as.factor(c(rep("01001", 11), rep("01043", 11), rep("01065", 11), rep("01069", 11)))
YearCode <- as.numeric(rep(1:11, 4))
Type <- c(NA,NA,NA,NA,NA,NA,NA,2,NA,NA,NA,NA,NA,NA,
          NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
          NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,NA)
test <- NA
sample_df <- data.frame(Id, YearCode, Type, test)

# A part of sample_df
one_df <- subset(sample_df, sample_df$Id=="01069")

这个for循环适用于一个数据帧：

# example for loop using example data frame "one_df"
for(i in seq(along=one_df$Id)){
if(is.na(one_df$Type[i])){  # if Type is NA, recode to 0
one_df$test[i] <- 0  
} else {   # Stop when Type is not NA, and leave remaining NAs that come after
break }
}

但是，我在列表中有相同格式的许多数据帧。我想将它们保留在列表中并将此循环应用于整个列表。

# example list : split data frame into list by Id
sample_list <- split(sample_df, sample_df$Id, drop = TRUE)

我查看了其他帖子，例如this one，但是当我尝试遍历列表中的每个数据框或使用lapply编写类似的函数时，我感到困惑。如何使用for循环，lapply或其他方法修改此循环以在列表（sample_list）上工作？

任何提示都将不胜感激，如果我需要澄清任何事情，请告诉我。谢谢！

Answer 1

我认为以下内容可以完成你所描述的工作。我做的是以下。我首先使用test创建了一个名为if_else()的新列。如果complete.cases（Type）为TRUE，则使用Type中的值。否则使用0.下一步是用NA替换某些特定的0。因为您不希望在Type中具有第一个数值的行之后的行中有0。例如，你不想在Id == 01069的第10行之后得到0。所以我创建了测试条件：row_number() > which(complete.cases(Type))[1]。您可以将其读作＆＃34;行号是否大于第一个数值的行号。＆＃34;使用这个条件，我用NA替换了0。我提供了sample_df的部分结果。我希望这会对你的工作有所帮助。

library(dplyr)

sample_df %>%
group_by(Id) %>%
mutate(test = if_else(complete.cases(Type), Type, 0),
       test = if_else(row_number() > which(complete.cases(Type))[1],
                      NA_real_, test)) -> out

#       Id YearCode  Type  test
#   <fctr>    <dbl> <dbl> <dbl>
#1   01001        1    NA     0
#2   01001        2    NA     0
#3   01001        3    NA     0
#4   01001        4    NA     0
#5   01001        5    NA     0
#6   01001        6    NA     0
#7   01001        7    NA     0
#8   01001        8     2     2
#9   01001        9    NA    NA
#10  01001       10    NA    NA
#11  01001       11    NA    NA
#------------------------------
#34  01069        1    NA     0
#35  01069        2    NA     0
#36  01069        3    NA     0
#37  01069        4    NA     0
#38  01069        5    NA     0
#39  01069        6    NA     0
#40  01069        7    NA     0
#41  01069        8    NA     0
#42  01069        9    NA     0
#43  01069       10     2     2
#44  01069       11    NA    NA

修改

根据他/她的评论，当Type只包含NAs时，OP想要0。以下将完成这项工作。

sample_df %>% group_by(Id) %>% mutate(test = if_else(complete.cases(Type), Type, 0), test = if_else(row_number() > which(complete.cases(Type))[1], NA_real_, test), foo = sum(Type, na.rm = TRUE), test = replace(test, which(foo == 0), 0)) %>% select(-foo) -> out # A part of the result # Id YearCode Type test # <fctr> <dbl> <dbl> <dbl> #1 01001 1 NA 0 #2 01001 2 NA 0 #3 01001 3 NA 0 #4 01001 4 NA 0 #5 01001 5 NA 0 #6 01001 6 NA 0 #7 01001 7 NA 0 #8 01001 8 2 2 #9 01001 9 NA NA #10 01001 10 NA NA #11 01001 11 NA NA #12 01043 1 NA 0 #13 01043 2 NA 0 #14 01043 3 NA 0 #15 01043 4 NA 0 #16 01043 5 NA 0 #17 01043 6 NA 0 #18 01043 7 NA 0 #19 01043 8 NA 0 #20 01043 9 NA 0 #21 01043 10 NA 0 #22 01043 11 NA 0

Answer 2

是否存在创建函数和使用lapply的问题？它似乎工作

#rm(list=ls())
Id <- as.factor(c(rep("01001", 11), rep("01043", 11), rep("01065", 11), rep("01069", 11)))
YearCode <- as.numeric(rep(1:11, 4))
Type <- c(NA,NA,NA,NA,NA,NA,NA,2,NA,NA,NA,NA,NA,NA,
          NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
          NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,2,NA)
test <- NA
sample_df <- data.frame(Id, YearCode, Type, test)

# A part of sample_df
one_df <- subset(sample_df, sample_df$Id=="01069")

sample_list <- split(sample_df, sample_df$Id, drop = TRUE)

####################################

# for loop as funciton   
fnX<- function(myDF){
 for(i in seq(along=myDF$Id)){
   if(is.na(myDF$Type[i])){  # if Type is NA, recode to 0
    myDF$test[i] <- 0  
   } else {   # Stop and leave remaining NAs that come after
   break }
  } 
  myDF
 }

#apply function 
fnX(sample_list$`01069`)   

lapply(sample_list,fnX)

R：在数据帧列表上使用for循环

2 个答案: