我想将非NA值保存在一行中,然后选择第一个元素 列表作为新列(ncol)的最终值。
这是我想要获得的一个例子以及我无法获得的代码 跑...
ID <- c(1,2,3,4)
A <- c("A", "NA", "C", "R")
B <- c("G", "V", "NA", "T")
C <- c("NA", "NA", "NA", "Y")
D <- c("U", "W", "NA", "NA")
mydf <- data.frame(ID, A, B, C, D, ncol)
ID A B C D ncol
1 1 A G NA U A
2 2 NA V NA W V
3 3 C NA NA NA C
4 4 R T Y NA R
mycol <- c(mydf$A, mydf$B, mydf$C, mydf$D)
for (i in seq(1:nrow(mydf))){
listcolincldata <- lapply(mycol[i],[!is.na(mycols[i])])
print(listcolincldata)
mydf$newcol[i] <- (as.character(listcolincldata[1]))
}
答案 0 :(得分:3)
克里斯,
以下是两个示例解决方案。一个修改您的数据框以用真正的NA
值替换文本“NA”,第二个使用“NA”的文本识别。两者都使用跨数据框行的apply()(通过将margin参数设置为1)。
A <- c("A", "NA", "C", "R")
B <- c("G", "V", "NA", "T")
C <- c("NA", "NA", "NA", "Y")
D <- c("U", "W", "NA", "NA")
# First solution: convert text NA to true NA
eg1 <- data.frame(A, B, C, D, stringsAsFactors = TRUE)
eg1[eg1 == "NA"] <- NA
eg1$solution <- apply(eg1, 1, function(rw) rw[!is.na(rw)][1])
eg1
# Second solution: string recognition
eg2 <- data.frame(A, B, C, D, stringsAsFactors = TRUE)
eg2$solution <- apply(eg2, 1, function(rw) rw[rw != "NA"][1])
eg2
答案 1 :(得分:2)
您需要使用apply
来循环显示:
数据
ID <- c(1,2,3,4)
A <- c("A", NA, "C", "R")
B <- c("G", "V", NA, "T")
C <- c(NA, NA, NA, "Y")
D <- c("U", "W", NA, NA)
mydf <- data.frame(ID, A, B, C, D)
解决方案:
#using apply is the same as lapply, but applied row wise
#na.omit will remove the NAs for each row
#then I use [2] to pick the first value after the ID column
#result is as shown in your output
mydf$ncol <- apply(mydf, 1, function(x) na.omit(x)[2])
输出:
> mydf
ID A B C D ncol
1 1 A G <NA> U A
2 2 <NA> V <NA> W V
3 3 C <NA> <NA> <NA> C
4 4 R T Y <NA> R
答案 2 :(得分:1)
另一种选择是将max.col
与ties.method='first'
一起使用。这将获得第一个非NA值的列索引,cbind
它与行索引并获取值。
mydf$ncol <- mydf[cbind(1:nrow(mydf),max.col(!is.na(mydf[,-1]), 'first')+1L)]
mydf
# ID A B C D ncol
#1 1 A G <NA> U A
#2 2 <NA> V <NA> W V
#3 3 C <NA> <NA> <NA> C
#4 4 R T Y <NA> R
ID <- c(1,2,3,4)
A <- c("A", NA, "C", "R")
B <- c("G", "V", NA, "T")
C <- c(NA, NA, NA, "Y")
D <- c("U", "W", NA, NA)
mydf <- data.frame(ID, A, B, C, D)
注意:我创建了NA
作为真正的NA而不是字符串。