假设我们有以下数据:
d <- data.frame(
"V" = c("A", "B"),
"X1" = c("A", "A"),
"X2" = c("B","B"),
"X3" = c("C", "C"),
"Y1" = c(1, 4),
"Y2" = c(2, 5),
"Y3" = c(3, 6)
)
d[] <- lapply(d, as.character)
d
V X1 X2 X3 Y1 Y2 Y3
1 A A B C 1 2 3
2 B A B C 4 5 6
我想创建一个变量VAL,如果V = X [n],则该变量将采用Y [n]的值
我可以使用ifelse语句来做到这一点,但是我想避免嵌套ifelse,因为n未知
d$VAL_ifelse = ifelse(d$V == d$X1,d$Y1,
ifelse(d$V == d$X2,d$Y2,
ifelse(d$V == d$X3,d$Y3,NA)))
我试图创建此循环,但我认为j存在问题?
d_X_var=grep("^X", names(d), value=TRUE)
for(i in 1:nrow(d)){
for(j in 1:length(d_X_var)){
if((d[i,c('V')] == d[i,paste0('X',j)]) == TRUE){
d$VAL_loop[i] <- as.character(d[i,paste0('Y',j)])
} else if((d[i,c('V')] != d[i,paste0('X',j)]) == TRUE){
d$VAL_loop[i] <- NA
}
}
}
d
V X1 X2 X3 Y1 Y2 Y3 VAL_ifelse VAL_loop
1 A A B C 1 2 3 1 <NA>
2 B A B C 4 5 6 5 <NA>
答案 0 :(得分:4)
我们可以使用矢量化方法来获取VAL
d$Val <- d[5:7][which(d[2:4] == d$V, arr.ind = TRUE)]
d
# V X1 X2 X3 Y1 Y2 Y3 Val
#1 A A B C 1 2 3 1
#2 B A B C 4 5 6 5
当您事先知道X
和Y
列的列号时,上述情况成立。如果您不知道,我们可以先使用grep
获取列号,然后获取子集。
X_cols <- grep("^X", names(d))
Y_cols <- grep("^Y", names(d))
d$Val <- d[Y_cols][which(d[X_cols] == d$V, arr.ind = TRUE)]
答案 1 :(得分:0)
我们可以矢量化方式使用max.col
中的base R
d$Val <- d[5:7][cbind(seq_len(nrow(d)), max.col(d$V == d[2:4], 'first'))]
d
# V X1 X2 X3 Y1 Y2 Y3 Val
#1 A A B C 1 2 3 1
#2 B A B C 4 5 6 5
如果没有匹配项,我们可以将NA
与rowSums
一起输出(来自注释的数据)
d <- data.frame( "V" = c("A", "B","C","D","C"), "X1" = c("A", "A","A","A","A"), "X2" = c("B","B","B","B","A"), "X3" = c("C", "C","C","D","A"), "Y1" = c(1, 4, 7, 10, 13), "Y2" = c(2, 5, 8, 11, 14), "Y3" = c(3, 6, 9, 12,15), "Val_expected" = c(1,5,9,12,NA) )
d[,] <- lapply(d, as.character)
d$Val <- d[5:7][cbind(seq_len(nrow(d)), max.col(d$V == d[2:4], 'first'))]
d$Val <- as.numeric(d$Val) * (NA^ !rowSums(d$V == d[2:4]))
d$Val
#[1] 1 5 9 12 NA
答案 2 :(得分:0)
这是使用ifelse
和diag
的一种令人费解的方法:
d$Val <- ifelse(d$V == diag(as.matrix(d[,2:4])), diag(as.matrix(d[,5:7])), NA)
输出:
V X1 X2 X3 Y1 Y2 Y3 Val
1 A A B C 1 2 3 1
2 B A B C 4 5 6 5