NA的重新编码变量出现问题

时间:2018-07-31 21:22:01

标签: r dplyr

这是我的代码的最小示例:

infile    <- read.table("testdaten_studie2.csv", header=TRUE, stringsAsFactors = FALSE, sep=";",dec=",", na = -77)
infile    <- subset(infile, Chiffre == "LP030482")

bdi.sub   <- subset(infile, select = c(Base_BDI_v1:Base_BDI_v21))
bdi.mean  <- apply(bdi.sub,1,mean,na.rm = TRUE)

bdi.sub %<>% 
  mutate_at(paste0('Base_BDI_v', c(1:21)), recode, '1'='0', '2'='1', '3'='2', '4'='3', "NA"="NA")

如果我以此方式运行,则bdi.sub返回NA。 当我尝试时:

bdi.sub$Base_BDI_v1 <- recode(bdi.sub$Base_BDI_v1, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v2 <- recode(bdi.sub$Base_BDI_v2, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v3 <- recode(bdi.sub$Base_BDI_v3, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v4 <- recode(bdi.sub$Base_BDI_v4, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v5 <- recode(bdi.sub$Base_BDI_v5, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v6 <- recode(bdi.sub$Base_BDI_v6, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v7 <- recode(bdi.sub$Base_BDI_v7, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v8 <- recode(bdi.sub$Base_BDI_v8, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v9 <- recode(bdi.sub$Base_BDI_v9, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v10 <- recode(bdi.sub$Base_BDI_v10, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v11 <- recode(bdi.sub$Base_BDI_v10, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v12 <- recode(bdi.sub$Base_BDI_v12, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v13 <- recode(bdi.sub$Base_BDI_v13, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v14 <- recode(bdi.sub$Base_BDI_v14, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v15 <- recode(bdi.sub$Base_BDI_v15, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v16 <- recode(bdi.sub$Base_BDI_v16, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v17 <- recode(bdi.sub$Base_BDI_v17, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v18 <- recode(bdi.sub$Base_BDI_v18, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v19 <- recode(bdi.sub$Base_BDI_v19, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v20 <- recode(bdi.sub$Base_BDI_v20, "1=0; 2=1; 3=2; 4=3; NA=NA")
bdi.sub$Base_BDI_v21 <- recode(bdi.sub$Base_BDI_v21, "1=0; 2=1; 3=2; 4=3; NA=NA")

这是bdi.sub数据集:

structure(list(Base_BDI_v1 = c(NA, 1L, 4L, 4L, NA, 1L), Base_BDI_v2 = c(NA, 
1L, 3L, 1L, NA, 1L), Base_BDI_v3 = c(NA, 1L, 3L, 1L, NA, 4L), 
    Base_BDI_v4 = c(NA, 1L, 2L, 4L, NA, 3L), Base_BDI_v5 = c(NA, 
    1L, 3L, 4L, NA, 4L), Base_BDI_v6 = c(NA, 1L, 3L, 1L, NA, 
    4L), Base_BDI_v7 = c(NA, 1L, 4L, 4L, NA, 2L), Base_BDI_v8 = c(NA, 
    1L, 3L, 4L, NA, 2L), Base_BDI_v9 = c(NA, 1L, 1L, 1L, NA, 
    4L), Base_BDI_v10 = c(NA, 0L, 4L, 3L, NA, 3L), Base_BDI_v11 = c(NA, 
    0L, 4L, 4L, NA, 3L), Base_BDI_v12 = c(NA, 0L, 2L, 1L, NA, 
    3L), Base_BDI_v13 = c(NA, 0L, 1L, 2L, NA, 2L), Base_BDI_v14 = c(NA, 
    0L, 2L, 4L, NA, 1L), Base_BDI_v15 = c(NA, 2L, 3L, 1L, NA, 
    3L), Base_BDI_v16 = c(NA, 3L, 7L, 4L, NA, 6L), Base_BDI_v17 = c(NA, 
    2L, 3L, 4L, NA, 1L), Base_BDI_v18 = c(NA, 1L, 1L, 6L, NA, 
    6L), Base_BDI_v19 = c(NA, 1L, 3L, 3L, NA, 1L), Base_BDI_v20 = c(NA, 
    2L, 3L, 3L, NA, 3L), Base_BDI_v21 = c(NA, 1L, 3L, 1L, NA, 
    4L)), row.names = c(NA, 6L), class = "data.frame")

我可以创建均值。



重新编码几个变量时遇到问题。 使用

data %<>% 
    mutate_at(paste0('var', c(1:59)),
              recode, '1'='0', '2'='1', '3'='2', '4'='3') %>%
    mutate_at(paste0('var', c(65,73,99)),
              recode, '1'='0', '2'='0', '3'='0', '4'='1')

结果:

  

useMethod(重新编码)上的错误:没有适用于“逻辑”类对象的“重新编码”方法

数据集中有许多NA需要保留。我的错误在哪里?

2 个答案:

答案 0 :(得分:0)

将此作为评论,但意识到它也可以作为答案。

我猜想这里的症结在于,您正在通过mutate_at调用创建逻辑向量,并使用paste进行选择。这个逻辑向量作为参数传递给recode,但是由于它需要一个非逻辑向量作为参数,因此当然不被接受

请参见?recode

  

对于逻辑矢量,请使用if_else()

我认为您可以将recode替换为case_when,然后它应该可以工作。

修改
在获得一些数据之后-您的代码实际上对我有用(在将其适应您的数据之后)

data1 %>%  #my name for your data
  mutate_at(paste0('Base_BDI_v', 1:10),   #randomly chosen columns
            recode, '1'='0', '2'='1', '3'='2', '4'='3', 'NA' = 'NA') %>%
  mutate_at(paste0('Base_BDI_v', c(12,14,16)),
            recode, '1'='0', '2'='0', '3'='0', '4'='1', 'NA' = 'NA')

请注意,使用这种方法会得到字符类型列!

str(d)
'data.frame':   6 obs. of  21 variables:
 $ Base_BDI_v1 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v2 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v3 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v4 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v5 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v6 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v7 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v8 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v9 : chr  NA "0" "0" "0" ...
 $ Base_BDI_v10: chr  NA NA "0" "0" ...
 $ Base_BDI_v11: int  NA 0 4 4 NA 3
 $ Base_BDI_v12: chr  NA NA "0" "0" ...
 $ Base_BDI_v13: int  NA 0 1 2 NA 2
 $ Base_BDI_v14: chr  NA NA "0" "0" ...
 $ Base_BDI_v15: int  NA 2 3 1 NA 3
 $ Base_BDI_v16: chr  NA "0" NA "0" ...
 $ Base_BDI_v17: int  NA 2 3 4 NA 1
 $ Base_BDI_v18: int  NA 1 1 6 NA 6
 $ Base_BDI_v19: int  NA 1 3 3 NA 1
 $ Base_BDI_v20: int  NA 2 3 3 NA 3
 $ Base_BDI_v21: int  NA 1 3 1 NA 4

答案 1 :(得分:0)

我建议只在代码中添加一行。

data %>% 
    mutate_if(is.logical, as.character) %>%
    mutate_at(paste0('var', c(1:59)), recode, '1'='0', '2'='1', '3'='2', '4'='3') %>%
    mutate_at(paste0('var', c(65,73,99)), recode, '1'='0', '2'='0', '3'='0', '4'='1')

尽管我没有您的数据,但这在某些测试中对我有用。

如果您担心将所有逻辑类型更改为字符,则可以执行Tjebo建议并使用case_when

data %>% 
    mutate_at(paste0('var', c(1:59)), funs(case_when(. == '1'~'0', . == '2'~'1', . == '3'~'2', . == '4'~'3'))) %>%
    mutate_at(paste0('var', c(65,73,99)), funs(case_when(. == '1'~'0', . == '2'~'0', . == '3'~'0', . == '4'~'1')))