df=data.frame(col1=c(100, 100 ,100, 120 ,100 ,100, 100 ,110 ,100, 100, 100, 110, 110 ,100 ,160, 110 90, 170 ,120, 160, 110),
col2=c(13.4739 ,13.5536, 10.7045, 30.6985 ,10.9025, 13.5834, 15.6149, 23.5638 ,12.0090 ,12.1103 ,12.5012, 23.9931, 23.9931, 13.5775 ,56.9106, 24.1769 ,8.3623 ,65.6385, 30.0437, 58.8961, 25.2614))
df[df$col2>60&df$col2<70,c("col1","col2")]
df[df$col2>50&df$col2<60,c("col1","col2")]
df[df$col2>40&df$col2<50,c("col1","col2")]
df[df$col2>30&df$col2<40,c("col1","col2")]
df[df$col2>20&df$col2<30,c("col1","col2")]
df[df$col2>10&df$col2<20,c("col1","col2")]
df[df$col2>0&df$col2<10,c("col1","col2")]
我想用这种方式过滤数据帧,但是有一个函数可以避免所有这些代码吗?
我想创建一个从0到max(df $ col2)by = 10并循环的序列,
seq(0,max(df$col2),by=10)
但是结果不会像上面的结果一样出现,因为底部的过滤器会在顶部重复:
# observe 10
df [df $ col2> 10 & df $ col2 <20, c ("col1", "col2")]
df [df $ col2> 0 & df $ col2 <10, c ("col1", "col2")]```
我的意图是创建一个df,其中两列都考虑过滤条件
预期输出:
filter1=df24[df24$col2>50&df24$col2<60,c("col1","col2")]
>filter1
col1 col2
15 160 56.9106
20 160 58.8961
> filter2=df24[df24$col2>60&df24$col2<70,c("col1","col2")]
> filter2
col1 col2
18 170 65.6385
> class(filter1)
[1] "data.frame"
答案 0 :(得分:1)
您可以使用cut_interval
和group_split
:
library(tidyverse)
df %>% group_split(grp = cut_interval(col2, length = 10, labels = FALSE))
输出:
[[1]]
# A tibble: 1 x 3
col1 col2 cut_grp
<dbl> <dbl> <int>
1 170 8.36 1
[[2]]
# A tibble: 10 x 3
col1 col2 cut_grp
<dbl> <dbl> <int>
1 100 13.5 2
2 100 13.6 2
3 100 10.7 2
4 100 10.9 2
5 100 13.6 2
6 100 15.6 2
7 100 12.0 2
8 100 12.1 2
9 100 12.5 2
10 100 13.6 2
[[3]]
# A tibble: 4 x 3
col1 col2 cut_grp
<dbl> <dbl> <int>
1 110 23.6 3
2 110 24.0 3
3 110 24.0 3
4 11090 24.2 3
[[4]]
# A tibble: 2 x 3
col1 col2 cut_grp
<dbl> <dbl> <int>
1 120 30.7 4
2 160 30.0 4
[[5]]
# A tibble: 2 x 3
col1 col2 cut_grp
<dbl> <dbl> <int>
1 160 56.9 6
2 110 58.9 6
[[6]]
# A tibble: 1 x 3
col1 col2 cut_grp
<dbl> <dbl> <int>
1 120 65.6 7
答案 1 :(得分:0)
我建议这种方法:
#Create index
index <- seq(0,max(df$col2),by=10)
#Loop
for(i in index)
{
df[df$col2> i & df$col2 <i+10, c ("col1", "col2")]
}
如果要创建单个元素,则可以使用如下列表:
#Create index
index <- seq(0,max(df$col2),by=10)
index <- rev(index)
#Create list
List <- list()
#Loop
for(i in 1:length(index))
{
List[[i]] <- df[df$col2> index[i] & df$col2 <index[i]+10, c ("col1", "col2")]
}
输出:
List
[[1]]
col1 col2
18 170 65.6385
[[2]]
col1 col2
15 160 56.9106
20 160 58.8961
[[3]]
[1] col1 col2
<0 rows> (or 0-length row.names)
[[4]]
col1 col2
4 120 30.6985
19 120 30.0437
[[5]]
col1 col2
8 110 23.5638
12 110 23.9931
13 110 23.9931
16 110 24.1769
21 110 25.2614
[[6]]
col1 col2
1 100 13.4739
2 100 13.5536
3 100 10.7045
5 100 10.9025
6 100 13.5834
7 100 15.6149
9 100 12.0090
10 100 12.1103
11 100 12.5012
14 100 13.5775
[[7]]
col1 col2
17 90 8.3623
最后一个循环在问题开始时复制了各个过滤器的相同逻辑。
答案 2 :(得分:0)
您可以使用split
函数:
split(df, findInterval(df$col2, seq(0, max(df$col2), 10)))
$`1`
col1 col2
17 90 8.3623
$`2`
col1 col2
1 100 13.4739
2 100 13.5536
3 100 10.7045
5 100 10.9025
6 100 13.5834
7 100 15.6149
9 100 12.0090
10 100 12.1103
11 100 12.5012
14 100 13.5775
......
如果您还需要注意不包含任何数据的范围,请改用cut
。即:
split(df, cut(df$col2, seq(0, max(df$col2) + 10, 10)))
....
$`(30,40]`
col1 col2
4 120 30.6985
19 120 30.0437
$`(40,50]`
[1] col1 col2
<0 rows> (or 0-length row.names)
$`(50,60]`
col1 col2
15 160 56.9106
20 160 58.8961
....