我有一个类似
的数据集structure(list(ID = 1:100, A = c(1, 1, 1, 0, 0, 0, 1, 0, 1, 1,
0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0,
1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0,
1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
0, 1, 1, 1, 1, 1), B = c(-0.779571910800539, -1.01813937566596,
-0.617199891458882, 0.0309580500019241, 0.543273110365976, -0.0433300396605711,
0.230179974373525, -0.183807679340284, 1.23013876772693, -0.447068495884132,
-0.529019912858711, -0.423813233701193, -2.02301749716477, 0.107354643198155,
-0.182036878855649, -0.0686544314102692, -0.242211690200168,
0.235711424228903, -0.737085567507625, 1.08272499166402, 1.46797946789107,
0.676133655269793, 0.0970319828391364, -0.175265540837544, 1.01932401639564,
-1.6120456930373, -0.237498813763703, -1.0793071544667, 0.34060211076922,
0.358651319904244, 1.14185300245182, 0.643831607010375, -1.48935271976024,
1.52070114310115, 0.13758246936271, 0.677489791752007, -0.0421866338789382,
-0.963909996107064, -0.419518874496373, -1.94843733945541, -0.856606011022689,
0.950271505971139, -0.00501879225795071, -0.907348953277799,
0.176003279346265, 0.849120713832199, -0.682289211320935, 0.618834674100358,
-0.266654135174762, 1.38431159868239, 0.464047120137739, -0.478626559461985,
0.149837396236788, -1.22592409132424, 0.658992970998059, -0.755502690343619,
-1.64278237304159, 0.9123549798475, 0.212894692780789, -0.670549407572393,
2.37707712870178, -0.0295080172428597, -0.823140252108969, -0.428902533453998,
-0.435036177848892, 0.98534295091355, 1.24538388550067, 0.763169631787973,
0.0481870286750498, 0.373727588477095, 0.515173230638657, -0.980950523005618,
2.34498921196051, 1.16497367254483, 0.803207456941987, -1.20555741222113,
-1.69603664220648, -0.59655174894536, -0.471190748123387, 1.53055765388398,
0.426904841661558, -0.385574044956116, -1.05023815909094, -1.45225542235577,
-0.545485253245417, 0.173122341859165, -1.23651408987118, 0.438591835746343,
-0.826135255947115, 0.371873486298494, -0.422519474801474, -0.34343504002476,
-0.508591050193541, -1.64448384253113, -0.217712097435782, -0.396102247417337,
-0.324089563130585, 1.3108035615729, -1.74881781621313, -0.887343297491297
), C = c(2, 1, 2, 2, 1, 2, 1, 2, 3, 1, 1, 3, 2, 2, 3, 3, 2, 1,
1, 2, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 1, 3, 3, 2, 3, 3, 3, 3,
3, 1, 1, 2, 1, 1, 3, 3, 2, 3, 3, 1, 3, 1, 1, 2, 1, 1, 2, 1, 2,
2, 3, 2, 3, 3, 1, 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, 2, 3, 3, 3, 1,
3, 3, 3, 3, 1, 1, 2, 1, 3, 2, 3, 3, 3, 1, 2, 2, 3, 3, 2), D = c(3,
2, 0, 1, 0, 2, 1, 1, 1, 2, 1, 3, 1, 0, 1, 2, 1, 1, 1, 3, 0, 3,
0, 0, 1, 3, 0, 3, 2, 1, 3, 1, 3, 0, 2, 1, 2, 0, 2, 2, 0, 0, 0,
3, 3, 3, 3, 2, 3, 2, 2, 1, 0, 2, 1, 0, 2, 0, 2, 2, 2, 3, 0, 1,
0, 3, 3, 1, 2, 1, 1, 0, 1, 0, 3, 1, 1, 1, 0, 2, 0, 3, 2, 3, 2,
2, 3, 3, 1, 2, 3, 3, 1, 2, 3, 2, 3, 3, 0, 2), E = c(0, 1, 0,
1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1), F = c(14L, 12L, 8L, 5L,
13L, 8L, 8L, 9L, 11L, 13L, 11L, 8L, 12L, 9L, 8L, 17L, 11L, 13L,
7L, 13L, 9L, 9L, 11L, 7L, 11L, 13L, 14L, 10L, 12L, 15L, 5L, 12L,
7L, 8L, 10L, 11L, 5L, 10L, 2L, 10L, 9L, 14L, 4L, 10L, 6L, 14L,
10L, 6L, 14L, 2L, 7L, 11L, 9L, 8L, 11L, 9L, 15L, 10L, 16L, 11L,
7L, 8L, 12L, 17L, 5L, 13L, 15L, 11L, 10L, 7L, 6L, 12L, 10L, 8L,
7L, 8L, 11L, 14L, 6L, 4L, 9L, 11L, 9L, 13L, 7L, 9L, 9L, 12L,
10L, 6L, 10L, 5L, 14L, 10L, 13L, 6L, 8L, 8L, 7L, 12L)), .Names = c("ID",
"A", "B", "C", "D", "E", "F"), row.names = c(NA, -100L), class = "data.frame")
但是,我的实际数据集有100列。我想将任何具有小于或等于5个唯一值的变量更改为一个因子。我正在使用带有以下代码的dplyr:
df %>%
mutate_if(is.integer, as.numeric) %>%
mutate_if(length(unique(.)) <= 5, as.factor(.))
但是我得到了错误:
Error: length(.p) == length(vars) is not TRUE
有什么想法吗?我想将那些小于或等于5的唯一值转换为因子。
答案 0 :(得分:5)
您很亲密,只是缺少几个~
来表示purrr
风格的匿名函数:
library(dplyr)
df %>% mutate_if(~length(unique(.x)) <= 5, ~as.factor(.x))
结果:
> df %>% mutate_if(~length(unique(.x)) <= 5, ~as.factor(.x)) %>% glimpse()
Observations: 100
Variables: 7
$ ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,...
$ A <fct> 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0...
$ B <dbl> -0.779571911, -1.018139376, -0.617199891, 0.030958050, 0.543273110, -0.043330040, 0.230179974, -0.183807679, 1.230138768, -0.44706...
$ C <fct> 2, 1, 2, 2, 1, 2, 1, 2, 3, 1, 1, 3, 2, 2, 3, 3, 2, 1, 1, 2, 3, 2, 3, 2, 2, 2, 3, 3, 3, 2, 2, 1, 3, 3, 2, 3, 3, 3, 3, 3, 1, 1, 2, 1...
$ D <fct> 3, 2, 0, 1, 0, 2, 1, 1, 1, 2, 1, 3, 1, 0, 1, 2, 1, 1, 1, 3, 0, 3, 0, 0, 1, 3, 0, 3, 2, 1, 3, 1, 3, 0, 2, 1, 2, 0, 2, 2, 0, 0, 0, 3...
$ E <fct> 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0...
$ F <int> 14, 12, 8, 5, 13, 8, 8, 9, 11, 13, 11, 8, 12, 9, 8, 17, 11, 13, 7, 13, 9, 9, 11, 7, 11, 13, 14, 10, 12, 15, 5, 12, 7, 8, 10, 11, 5...
还要注意:如果我没记错的话,匿名函数中的.
和.x
可以互换使用,如果超过1个,我习惯使用.x
参数(例如purrr::map2
)