我有许多连续变量列,我需要计算其中有多少列在 x 范围内。我尝试了一种类似于此的相当冗长的方法:
library(tidyverse)
mtcars %>%
mutate("1" = if_else(mpg >= 0 & mpg <= 1, 1, 0),
"5" = if_else(mpg >= 0 & mpg <= 5, 1, 0),
"10" = if_else(mpg >= 0 & mpg <= 10, 1, 0),
"20" = if_else(mpg >= 0 & mpg <= 20, 1, 0),
"50" = if_else(mpg >= 0 & mpg <= 50, 1, 0),
"100" = if_else(mpg >= 0 & mpg <= 100, 1, 0),
"400" = if_else(mpg >= 0 & mpg <= 400, 1, 0),
distance = round(mpg , 2)) %>%
select("1", "5", "10", "20", "50", "100", "400") %>%
map_df(mean)
有没有一种优雅的方法来迭代这个过程而不复制和粘贴变量?
答案 0 :(得分:3)
如果我们需要对所有数字列执行此操作,则循环上范围值,然后使用 summarise
和 across
循环跨列,计算 mean
使用 between
library(dplyr)
library(purrr)
imap_dfr(lst(1, 5, 10, 20, 50, 100, 400), ~ {
ul <- .x
mtcars %>%
summarise(across(where(is.numeric),
~ mean(between(., 0, ul))))}, .id = 'categ')
-输出
# categ mpg cyl disp hp drat wt qsec vs am gear carb
#1 1 0.0000 0.00000 0.00000 0.00000 0 0.00000 0.00000 1 1 0 0.21875
#2 5 0.0000 0.34375 0.00000 0.00000 1 0.90625 0.00000 1 1 1 0.93750
#3 10 0.0000 1.00000 0.00000 0.00000 1 1.00000 0.00000 1 1 1 1.00000
#4 20 0.5625 1.00000 0.00000 0.00000 1 1.00000 0.90625 1 1 1 1.00000
#5 50 1.0000 1.00000 0.00000 0.00000 1 1.00000 1.00000 1 1 1 1.00000
#6 100 1.0000 1.00000 0.15625 0.28125 1 1.00000 1.00000 1 1 1 1.00000
#7 400 1.0000 1.00000 0.90625 1.00000 1 1.00000 1.00000 1 1 1 1.00000
或者这可以在 base R
中使用 outer
categ <- c(1, 5, 10, 20, 50, 100, 400)
out <- cbind(categ, outer(categ, mtcars,
Vectorize(function(x, y) mean(y >= 0 & y <= x))))
-输出
out
# categ mpg cyl disp hp drat wt qsec vs am gear carb
#[1,] 1 0.0000 0.00000 0.00000 0.00000 0 0.00000 0.00000 1 1 0 0.21875
#[2,] 5 0.0000 0.34375 0.00000 0.00000 1 0.90625 0.00000 1 1 1 0.93750
#[3,] 10 0.0000 1.00000 0.00000 0.00000 1 1.00000 0.00000 1 1 1 1.00000
#[4,] 20 0.5625 1.00000 0.00000 0.00000 1 1.00000 0.90625 1 1 1 1.00000
#[5,] 50 1.0000 1.00000 0.00000 0.00000 1 1.00000 1.00000 1 1 1 1.00000
#[6,] 100 1.0000 1.00000 0.15625 0.28125 1 1.00000 1.00000 1 1 1 1.00000
#[7,] 400 1.0000 1.00000 0.90625 1.00000 1 1.00000 1.00000 1 1 1 1.00000
答案 1 :(得分:1)
这应该给出相同的结果,并且我认为这是一种相当“整洁”的方式:
distance_thresholds <- tibble(
threshold = c(1, 5, 10, 20, 50, 100, 400)
)
mtcars %>%
left_join(distance_thresholds, by = character()) %>%
group_by(threshold) %>%
summarise(avg = mean(mpg <= threshold)) %>%
pivot_wider(names_from = threshold, values_from = avg)
答案 2 :(得分:0)
sapply
和 colMeans
:
vals <- c(1, 5, 10, 20, 50, 100, 400)
colMeans(sapply(vals, function(x) mtcars$mpg >=0 & mtcars$mpg <= x))