R根据列值创建1/0 T / F对比度__like_矩阵/新变量

时间:2014-02-25 17:42:53

标签: r matrix

在R中,我们如何根据另一列中的唯一值以编程方式创建新变量?

我们可能开始的数据框的一个简单示例:

structure(list(obsNum = structure(c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 2L, 3L), .Label = c("obs1", "obs10", "obs11", "obs2", 
"obs3", "obs4", "obs5", "obs6", "obs7", "obs8", "obs9"), class = "factor"), 
    charVector = structure(c(1L, 2L, 3L, 2L, 2L, 3L, 1L, 1L, 
    2L, 2L, 3L), .Label = c("blue", "green", "red"), class = "factor")), .Names = c("obsNum", 
"charVector"), class = "data.frame", row.names = c(NA, -11L))

obsNum  charVector
  obs1        blue
  obs2       green
  obs3         red
  obs4       green
  obs5       green
  obs6         red
  obs7        blue
  obs8        blue
  obs9       green
 obs10       green
 obs11         red

我想结束的地方:

structure(list(obsNum = structure(c(1L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 2L, 3L), .Label = c("obs1", "obs10", "obs11", "obs2", 
"obs3", "obs4", "obs5", "obs6", "obs7", "obs8", "obs9"), class = "factor"), 
    charVector = structure(c(1L, 2L, 3L, 2L, 2L, 3L, 1L, 1L, 
    2L, 2L, 3L), .Label = c("blue", "green", "red"), class = "factor"), 
    blue = c(1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L), green = c(0L, 
    1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L), red = c(0L, 0L, 
    1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L)), .Names = c("obsNum", 
"charVector", "blue", "green", "red"), class = "data.frame", row.names = c(NA, 
-11L))

obsNum charVector blue green red
  obs1       blue    1     0   0
  obs2      green    0     1   0
  obs3        red    0     0   1
  obs4      green    0     1   0
  obs5      green    0     1   0
  obs6        red    0     0   1
  obs7       blue    1     0   0
  obs8       blue    1     0   0
  obs9      green    0     1   0
 obs10      green    0     1   0
 obs11        red    0     0   1

我对多步骤解决方案非常开放,例如:首先创建新变量;然后针对charVec评估每个新变量(名称),一次评估一个变量。如果保留观察的顺序,那么创建一个可以data.frame到开始文件的单独的cbind也是完全没问题的。

提前致谢并问候!

3 个答案:

答案 0 :(得分:2)

您可以使用table(和as.data.frame.matrix来保留表格格式):

x <- as.data.frame.matrix(table(df))
cbind(df, x[match(df$obsNum, rownames(x)), ])
#       obsNum charVector blue green red
# obs1    obs1       blue    1     0   0
# obs2    obs2      green    0     1   0
# obs3    obs3        red    0     0   1
# obs4    obs4      green    0     1   0
# obs5    obs5      green    0     1   0
# obs6    obs6        red    0     0   1
# obs7    obs7       blue    1     0   0
# obs8    obs8       blue    1     0   0
# obs9    obs9      green    0     1   0
# obs10  obs10      green    0     1   0
# obs11  obs11        red    0     0   1

答案 1 :(得分:2)

cbind(dat, model.matrix(~ . - 1, dat["charVector"]))

##    obsNum charVector charVectorblue charVectorgreen charVectorred                                                                                                                                                                          
## 1    obs1       blue              1               0             0                                                                                                                                                                          
## 2    obs2      green              0               1             0                                                                                                                                                                          
## 3    obs3        red              0               0             1                                                                                                                                                                          
## 4    obs4      green              0               1             0                                                                                                                                                                          
## 5    obs5      green              0               1             0                                                                                                                                                                          
## 6    obs6        red              0               0             1                                                                                                                                                                          
## 7    obs7       blue              1               0             0                                                                                                                                                                          
## 8    obs8       blue              1               0             0                                                                                                                                                                          
## 9    obs9      green              0               1             0                                                                                                                                                                          
## 10  obs10      green              0               1             0                                                                                                                                                                          
## 11  obs11        red              0               0             1  

答案 2 :(得分:0)

这是使用循环定义0/1变量的一种方法。它利用了TRUE / FALSE - &gt;这一事实。转换为数字时为0/1。

colors <- unique(df$charVector)
to.append <- matrix(0, nrow = dim(df[1]), ncol = length(colors))
colnames(to.append) <- colors

i <- 0
for(color in colors){
  i <- i + 1
  to.append[, i] <- df$charVector == color
}
df <- cbind(df, to.append)