dat <- structure(list(V1 = structure(c(3L, 4L, 1L, 5L, 6L, 1L, 1L, 1L, 1L, 1L),
.Label = c("0,0%", "0,5%", "0,6%", "1,0%", "1,2%", "2,0%", "2,1%", "2,4%",
"3,0%", "3,3%", "4,0%", "5,0%", "7,0%"), class = "factor"),
V2 = structure(c(6L, 7L, 5L, 7L, 7L, 7L, 1L, 1L, 1L, 1L),
.Label = c("0,0%", "12,0%", "2,0%", "2,8%", "3,0%", "3,6%", "4,0%", "4,3%",
"5,0%", "6,0%", "6,4%", "7,0%", "7,9%", "8,0%"), class = "factor"),
V3 = structure(c(3L, 6L, 2L, 16L, 2L, 14L, 1L, 1L, 1L, 1L),
.Label = c("0,0%", "10,0%", "11,7%", "11,9%", "12,0%", "13,0%", "14,0%", "15,0%",
"18,0%", "18,9%", "25,0%", "30,0%", "7,0%", "8,0%", "9,0%", "9,1%"), class = "factor"),
V4 = structure(c(8L, 9L, 4L, 5L, 7L, 3L, 2L, 2L, 2L, 2L),
.Label = c("0,5%", "1,0%","12,0%", "14,0%", "14,3%", "15,0%", "16,0%", "16,3%", "18,0%",
"19,4%", "20,0%", "22,0%", "22,4%", "23,0%", "25,0%", "28,0%",
"28,5%", "30,0%", "35,0%", "50,0%"), class = "factor")),
row.names = c(NA, 10L), class = "data.frame")
我想做两件事:
1)用小数,
删除.
2)删除%
符号
sapply(dat, function(x) as.numeric(gsub("%", "", x)))
sapply(dat, function(x) as.numeric(gsub(",", ".", x)))
他们两个都给我NA。我在这里做错了什么?
答案 0 :(得分:2)
以为我要添加一个整洁的方法:
library(tidyverse)
dat <- dat %>%
map_df(str_replace, pattern = ",", replacement = ".") %>%
map_df(str_remove, pattern = "%") %>%
map_df(as.numeric)
绝对不是最快的方法:
mbm <- microbenchmark::microbenchmark(lap = {lapply(dat, function(x)
as.numeric(gsub("%", "", gsub(",", "", x))))},
tidy = {dat %>%
map_df(str_replace, pattern = ",", replacement = ".") %>%
map_df(str_remove, pattern = "%") %>%
map_df(as.numeric)})
这表明使用lapply
而不是我的tidyverse方法大约快10倍,但对于某些人来说可能更难理解。
答案 1 :(得分:1)
我们需要一步来完成,因为移除numeric
后仍然转换为%
仍然是character
向量,因为存在,
。因此,仅在完成两项操作后才使用as.numeric
dat[] <- lapply(dat, function(x) as.numeric(gsub("%", "", gsub(",", ".", x))))
如果我们使用tidyverse
library(tidyverse)
dat %>%
mutate_all(funs(parse_number(str_replace(., ",", "."))))