我想重新编码数据框中列名称中任何位置包含字符串“钙”的所有列。因此,我尝试将grepl与dplyr中的mutate结合使用,但出现错误。
知道我在做什么错吗?我希望这是可能的!
我在下面使用dplyr尝试过的代码
#Make the dataframe
library(dplyr)
fake <-data.frame(id=c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,3,3,3),
time=c(rep("Time1",9), rep("Time2",9)),
test=c("calcium","magnesium","zinc","calcium","magnesium","zinc","calcium","magnesium","zinc","calcium","magnesium","zinc","calcium","magnesium","zinc","calcium","magnesium","zinc"),
score=rnorm(18))
df <- dcast(fake, id ~ time + test)
#My attempt
df <- df %>% mutate(category=cut(df[,grepl("calcium", colnames(df))], breaks=c(-Inf, 1.2, 6, 12, Inf), labels=c(0,1,2,3)))
#Error: 'x' must be numeric
#My second attempt
df <- df %>% mutate_at(vars(contains('calcium')), cut(breaks=c(-Inf, 1.2, 6, 12, Inf), labels=c(0,1,2,3)))
#Error: "argument "x" is missing, with no default"
答案 0 :(得分:1)
这是你的追求吗?
library(tidyverse)
library(reshape2) # I added this for your dcast
fake <-data.frame(id=c(1,1,1,2,2,2,3,3,3,1,1,1,2,2,2,3,3,3),
time=c(rep("Time1",9), rep("Time2",9)),
test=c("calcium","magnesium","zinc","calcium","magnesium","zinc",
"calcium","magnesium","zinc","calcium","magnesium","zinc",
"calcium","magnesium","zinc","calcium","magnesium","zinc"),
score=rnorm(18))
df <- dcast(fake, id ~ time + test)
df <- as_tibble(df) #added this
#code
df <- df %>%
mutate_at(vars(contains('calcium')),
~cut(.,
breaks=c(-Inf, 1.2, 6, 12, Inf),
labels=c(0, 1, 2, 3))) %>%
mutate_at(vars(ends_with("_calcium")), funs(as.numeric))
哪个产生这个:
# A tibble: 3 x 7
id Time1_calcium Time1_magnesium Time1_zinc Time2_calcium Time2_magnesium
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 2 -0.256 0.303 1 0.144
2 2 2 2.18 0.417 1 0.0650
3 3 1 0.863 -2.32 1 0.163
# ... with 1 more variable: Time2_zinc <dbl>
基于此:https://suzan.rbind.io/2018/02/dplyr-tutorial-2/#mutate-at-to-change-specific-columns