R:将互斥列映射到单个列

时间:2016-12-29 16:19:01

标签: r data-manipulation

我有一个火车数据集,其中有40个soil_types将soil_type1命名为土壤类型40。

 Id ..... Elevation s1 s2 s3 s4 s5.........s40
  1 .....       347  0  1  0  0  0           0
  2 .....       354  0  0  0  1  0           0
  3 .....       554  0  0  1  0  0           0

我想将这些列s1合并到s40到这样的单个列s

 Id ..... Elevation  s
 1  .....       347 s2
 2  .....       354 s4
 3  .....       554 s3

我可以想到这样做,但在R中必须有更好的方法。

 train$s <- NA
    train$s[trains$S1 == 1] <- s1
    train$s[trains$S2 == 1] <- s2
    .
    .
    .
    train$s[trains$S29 == 1] <- s29

编辑:请注意,还有其他数据栏

4 个答案:

答案 0 :(得分:3)

我们可以对's'列进行子集,使用max.col获取索引,使用第一列获取cbind

i1 <- grep("^s\\d+", colnames(train))
cbind(train, s= max.col(train[i1], "first"))
#  Id Elevation s1 s2 s3 s4 s5 s40 s
#1  1       347  0  1  0  0  0   0 2
#2  2       354  0  0  0  1  0   0 4
#3  3       554  0  0  1  0  0   0 3

或另一种有效的选择是

cbind(train, s= as.matrix(train[i1])%*% 1:ncol(train[i1]))
#   Id Elevation s1 s2 s3 s4 s5 s40 s
#1  1       347  0  1  0  0  0   0 2
#2  2       354  0  0  0  1  0   0 4
#3  3       554  0  0  1  0  0   0 3

数据

train <- structure(list(Id = 1:3, Elevation = c(347L, 354L, 554L), s1 = c(0L, 
0L, 0L), s2 = c(1L, 0L, 0L), s3 = c(0L, 0L, 1L), s4 = c(0L, 1L, 
0L), s5 = c(0L, 0L, 0L), s40 = c(0L, 0L, 0L)), .Names = c("Id", 
"Elevation", "s1", "s2", "s3", "s4", "s5", "s40"), class = "data.frame", 
row.names = c(NA, -3L))

答案 1 :(得分:3)

这里遍历行,然后检查哪个列有1并返回其位置

df$s = apply(df[-1], 1, function(x) which(x == 1))
# df
#  Id s1 s2 s3 s4 s5 s40 s
#1  1  0  1  0  0  0   0 2
#2  2  0  0  0  1  0   0 4
#3  3  0  0  1  0  0   0 3

答案 2 :(得分:2)

使用带有arr.ind = TRUE参数的which的另一个基本R选项,用于返回每行为1的s变量中的列。

ones <- which(df[grep("^s\\d+", names(df))] == 1, arr.ind=TRUE)
dfNew <- cbind(df[1:2], "s" = ones[ones[, 1], 2])

  Id Elevation s
1  1       347 2
2  2       354 4
3  3       554 3

数据

df <- read.table(header=TRUE, text="Id  Elevation s1 s2 s3 s4 s5 s40
1 347  0  1  0  0  0 0
2 354  0  0  0  1  0 0
3  554  0  0  1  0  0 0")

答案 3 :(得分:1)

我有一个rshape2和dplyr的解决方案:

require(reshape2)
require(dplyr)
df <- data.frame(ID=seq(1:10), s0=rep(0,10), s1=rep(0,10),s2=rep(0,10),s3=rep(0,10),s4=rep(0,10),s5=rep(0,10),s6=rep(0,10))
df$s0[1] = 1
df$s1[2] = 1
df$s2[3] = 1
df$s3[4] = 1
df$s4[5] = 1
df$s5[6] = 1
df <- melt(df,id=c("ID")) %>%
        rename(s=variable) %>%
        filter(value==1) %>%
        select(-value)