R DataTable重新编码许多变量

时间:2020-03-18 18:32:38

标签: r data.table

data=data.frame("cat" = sample(c('a','b'), 100, r=T),
                "dog" = sample(c('a','b'), 100, r=T),
                "bark" = sample(c(1:1000), 100, r=T),
                "fox" = sample(c('a','b'), 100, r=T))

library(data.table)
setDT(data)

是否可以将猫,狗,狐狸中的“ a”替换为-9,将“ b”替换为9?我希望同时使用data.table中的列名进行操作

3 个答案:

答案 0 :(得分:1)

如果这些是字符列,则选项为set

for(nm1 in c('cat', 'dog', 'fox')) {
   set(data, i = which(data[[nm1]] == 'a'), j= nm1, value = -9)
   set(data, i = which(data[[nm1]] == 'b'), j= nm1, value = 9)
  }

或者另一个选择是

nm1 <- c('cat', 'dog', 'fox')
data[, (nm1) := lapply(.SD, function(x)  
          setNames(c(-9, 9), c('a', 'b'))[x]), .SDcols =nm1]

数据

set.seed(24)
data=data.frame("cat" = sample(c('a','b'), 100, replace=TRUE),
                "dog" = sample(c('a','b'), 100, replace=TRUE),
                "bark" = sample(c(1:1000), 100, replace=TRUE),
                "fox" = sample(c('a','b'), 100, replace=TRUE), stringsAsFactors = FALSE)

答案 1 :(得分:1)

1)使用“注释”中的设置将转换所有字符列。

DT[, lapply(.SD, function(x) if (is.character(x)) c(a = 9, b = -9)[x] else x)]

给予:

    cat dog bark fox
 1:   9  -9  890  -9
 2:  -9   9  693  -9
 3:   9  -9  641  -9
 4:  -9  -9  995  -9
 5:  -9   9  656   9
 6:   9  -9  709   9
 7:  -9   9  545  -9
 8:  -9   9  595   9
 9:  -9   9  290   9
10:   9  -9  148   9

2)如果您还有其他字符列,并且只想更改只有'a'和'b'的字符列,则:

 DT[, lapply(.SD, function(x) 
   if (is.character(x) && all(x %in% c("a", "b"))) c(a = 9, b = -9)[x] else x)]

3)名称如果您希望明确指定列名称,则可以使用此替代方法。这会将DT修改为所需的形式。

 nms <- c("cat", "dog", "fox")
 DT[, c(nms):=lapply(.SD, function(x) c(a = 9, b = -9)[x]), .SDcols = nms]

注意

library(data.table)
set.seed(123)
DT <- data.table("cat" = sample(c('a','b'), 10, TRUE),
                "dog" = sample(c('a','b'), 10, TRUE),
                "bark" = sample(c(1:1000), 10, TRUE),
                "fox" = sample(c('a','b'), 10, TRUE))

答案 2 :(得分:1)

另一个选择是:

cols <- c("cat", "dog", "fox")
DT[, (cols) := {
    m <- as.matrix(.SD)
    m[m=='a'] <- 9
    m[m=='b'] <- -9
    as.data.table(m)
}, .SDcols=cols]