遍历具有相同前缀的行和变量

时间:2019-06-03 15:11:27

标签: r

假设我们有以下数据:

d <- data.frame(
  "V" = c("A", "B"),
  "X1" = c("A", "A"),
  "X2" = c("B","B"),
  "X3" = c("C", "C"),
  "Y1" = c(1, 4),
  "Y2" = c(2, 5),
  "Y3" = c(3, 6)
)
d[] <- lapply(d, as.character)

d
  V X1 X2 X3 Y1 Y2 Y3
1 A  A  B  C  1  2  3
2 B  A  B  C  4  5  6

我想创建一个变量VAL,如果V = X [n],则该变量将采用Y [n]的值

我可以使用ifelse语句来做到这一点,但是我想避免嵌套ifelse,因为n未知

d$VAL_ifelse = ifelse(d$V == d$X1,d$Y1,
                      ifelse(d$V == d$X2,d$Y2,
                             ifelse(d$V == d$X3,d$Y3,NA)))  

我试图创建此循环,但我认为j存在问题?

d_X_var=grep("^X", names(d), value=TRUE)

for(i in 1:nrow(d)){
  for(j in 1:length(d_X_var)){
    if((d[i,c('V')] == d[i,paste0('X',j)]) == TRUE){
      d$VAL_loop[i] <- as.character(d[i,paste0('Y',j)])
    } else if((d[i,c('V')] != d[i,paste0('X',j)]) == TRUE){
      d$VAL_loop[i] <- NA
    }
  }
}

d
  V X1 X2 X3 Y1 Y2 Y3 VAL_ifelse VAL_loop
1 A  A  B  C  1  2  3          1     <NA>
2 B  A  B  C  4  5  6          5     <NA>

3 个答案:

答案 0 :(得分:4)

我们可以使用矢量化方法来获取VAL

d$Val <- d[5:7][which(d[2:4] == d$V, arr.ind = TRUE)]

d
#  V X1 X2 X3 Y1 Y2 Y3 Val
#1 A  A  B  C  1  2  3   1
#2 B  A  B  C  4  5  6   5

当您事先知道XY列的列号时,上述情况成立。如果您不知道,我们可以先使用grep获取列号,然后获取子集。

X_cols <- grep("^X", names(d))
Y_cols <- grep("^Y", names(d))
d$Val <- d[Y_cols][which(d[X_cols] == d$V, arr.ind = TRUE)]

答案 1 :(得分:0)

我们可以矢量化方式使用max.col中的base R

d$Val <- d[5:7][cbind(seq_len(nrow(d)), max.col(d$V == d[2:4], 'first'))]
d
#   V X1 X2 X3 Y1 Y2 Y3 Val
#1 A  A  B  C  1  2  3   1
#2 B  A  B  C  4  5  6   5

更新

如果没有匹配项,我们可以将NArowSums一起输出(来自注释的数据)

d <- data.frame( "V" = c("A", "B","C","D","C"), "X1" = c("A", "A","A","A","A"), "X2" = c("B","B","B","B","A"), "X3" = c("C", "C","C","D","A"), "Y1" = c(1, 4, 7, 10, 13), "Y2" = c(2, 5, 8, 11, 14), "Y3" = c(3, 6, 9, 12,15), "Val_expected" = c(1,5,9,12,NA) ) 
d[,] <- lapply(d, as.character) 

d$Val <- d[5:7][cbind(seq_len(nrow(d)), max.col(d$V == d[2:4], 'first'))]
d$Val <- as.numeric(d$Val) * (NA^ !rowSums(d$V == d[2:4]))
d$Val
#[1]  1  5  9 12 NA

答案 2 :(得分:0)

这是使用ifelsediag的一种令人费解的方法:

d$Val <- ifelse(d$V == diag(as.matrix(d[,2:4])), diag(as.matrix(d[,5:7])), NA) 

输出:

  V X1 X2 X3 Y1 Y2 Y3 Val
1 A  A  B  C  1  2  3   1
2 B  A  B  C  4  5  6   5