按10过滤数据帧

时间:2020-09-10 18:02:56

标签: r

df=data.frame(col1=c(100, 100 ,100, 120 ,100 ,100, 100 ,110 ,100, 100, 100, 110, 110 ,100 ,160, 110  90, 170 ,120, 160, 110),
col2=c(13.4739 ,13.5536, 10.7045, 30.6985 ,10.9025, 13.5834, 15.6149, 23.5638 ,12.0090 ,12.1103 ,12.5012, 23.9931, 23.9931, 13.5775 ,56.9106, 24.1769  ,8.3623 ,65.6385, 30.0437, 58.8961, 25.2614))

df[df$col2>60&df$col2<70,c("col1","col2")]
df[df$col2>50&df$col2<60,c("col1","col2")]
df[df$col2>40&df$col2<50,c("col1","col2")]
df[df$col2>30&df$col2<40,c("col1","col2")]
df[df$col2>20&df$col2<30,c("col1","col2")]
df[df$col2>10&df$col2<20,c("col1","col2")]
df[df$col2>0&df$col2<10,c("col1","col2")]

我想用这种方式过滤数据帧,但是有一个函数可以避免所有这些代码吗?

我想创建一个从0到max(df $ col2)by = 10并循环的序列,

seq(0,max(df$col2),by=10)

但是结果不会像上面的结果一样出现,因为底部的过滤器会在顶部重复:

# observe 10
df [df $ col2> 10 & df $ col2 <20, c ("col1", "col2")]
df [df $ col2> 0 & df $ col2 <10, c ("col1", "col2")]```



我的意图是创建一个df,其中两列都考虑过滤条件

预期输出:

filter1=df24[df24$col2>50&df24$col2<60,c("col1","col2")]
>filter1
   col1    col2
15  160 56.9106
20  160 58.8961

> filter2=df24[df24$col2>60&df24$col2<70,c("col1","col2")]
> filter2
   col1    col2
18  170 65.6385

> class(filter1)
[1] "data.frame"

3 个答案:

答案 0 :(得分:1)

您可以使用cut_intervalgroup_split

library(tidyverse)

df %>% group_split(grp = cut_interval(col2, length = 10, labels = FALSE))

输出:

[[1]]
# A tibble: 1 x 3
   col1  col2 cut_grp
  <dbl> <dbl>   <int>
1   170  8.36       1

[[2]]
# A tibble: 10 x 3
    col1  col2 cut_grp
   <dbl> <dbl>   <int>
 1   100  13.5       2
 2   100  13.6       2
 3   100  10.7       2
 4   100  10.9       2
 5   100  13.6       2
 6   100  15.6       2
 7   100  12.0       2
 8   100  12.1       2
 9   100  12.5       2
10   100  13.6       2

[[3]]
# A tibble: 4 x 3
   col1  col2 cut_grp
  <dbl> <dbl>   <int>
1   110  23.6       3
2   110  24.0       3
3   110  24.0       3
4 11090  24.2       3

[[4]]
# A tibble: 2 x 3
   col1  col2 cut_grp
  <dbl> <dbl>   <int>
1   120  30.7       4
2   160  30.0       4

[[5]]
# A tibble: 2 x 3
   col1  col2 cut_grp
  <dbl> <dbl>   <int>
1   160  56.9       6
2   110  58.9       6

[[6]]
# A tibble: 1 x 3
   col1  col2 cut_grp
  <dbl> <dbl>   <int>
1   120  65.6       7

答案 1 :(得分:0)

我建议这种方法:

#Create index
index <- seq(0,max(df$col2),by=10)
#Loop
for(i in index)
{
  df[df$col2> i & df$col2 <i+10, c ("col1", "col2")]
}

如果要创建单个元素,则可以使用如下列表:

#Create index
index <- seq(0,max(df$col2),by=10)
index <- rev(index)
#Create list
List <- list()
#Loop
for(i in 1:length(index))
{
  List[[i]] <- df[df$col2> index[i] & df$col2 <index[i]+10, c ("col1", "col2")]
}

输出:

List
[[1]]
   col1    col2
18  170 65.6385

[[2]]
   col1    col2
15  160 56.9106
20  160 58.8961

[[3]]
[1] col1 col2
<0 rows> (or 0-length row.names)

[[4]]
   col1    col2
4   120 30.6985
19  120 30.0437

[[5]]
   col1    col2
8   110 23.5638
12  110 23.9931
13  110 23.9931
16  110 24.1769
21  110 25.2614

[[6]]
   col1    col2
1   100 13.4739
2   100 13.5536
3   100 10.7045
5   100 10.9025
6   100 13.5834
7   100 15.6149
9   100 12.0090
10  100 12.1103
11  100 12.5012
14  100 13.5775

[[7]]
   col1   col2
17   90 8.3623

最后一个循环在问题开始时复制了各个过滤器的相同逻辑。

答案 2 :(得分:0)

您可以使用split函数:

split(df, findInterval(df$col2, seq(0, max(df$col2), 10)))

$`1`
   col1   col2
17   90 8.3623

$`2`
   col1    col2
1   100 13.4739
2   100 13.5536
3   100 10.7045
5   100 10.9025
6   100 13.5834
7   100 15.6149
9   100 12.0090
10  100 12.1103
11  100 12.5012
14  100 13.5775

......

如果您还需要注意不包含任何数据的范围,请改用cut。即:

 split(df, cut(df$col2, seq(0, max(df$col2) + 10, 10)))
....
$`(30,40]`
   col1    col2
4   120 30.6985
19  120 30.0437

$`(40,50]`
[1] col1 col2
<0 rows> (or 0-length row.names)

$`(50,60]`
   col1    col2
15  160 56.9106
20  160 58.8961
....