我有一个像这样的数据框:
df<- data.frame(region= c("1","1","1","1","2","2","2","3","3", "4",
"4"),
loc=c("1A","1A","1B","1C","2A","2B","2C","3A","3B","4A","4B"), sp1=
c("a","a","b","a","c","c","c","a","a","d","d"), sp2=
c("b","b","c","b","d","d","d","b","b", "e","e"), inter=
c("a_b","a_b","b_c","a_b","c_d","c_d","c_d","a_b","a_b",
"d_e","d_e"))
我想首先按区域对df
进行分组,然后在每个inter
中找到重复的region
WITHIN。然后我想在所有地区找到重复的inter
。第一个子集看起来像:
subset1<- data.frame(region=c("1","2","3","4"), sp1= c("a", "c", "a",
"d"),sp2= c("b", "d", "b", "e"), inter= c("a_b", "c_d", "a_b",
"d_e"))
最终输出如下:
df<- data.frame(sp1= c("a"), sp2= c("b"), inter=c("a_b"))
答案 0 :(得分:0)
对于每个区域内的重复项:使用dplyr
,filter
在区域内重复,然后找到distinct
region-inter组合:
library(dplyr)
within_region <- df %>%
group_by(region) %>%
filter(duplicated(inter)) %>%
distinct(region, inter, .keep_all = TRUE)
# region loc sp1 sp2 inter
# 1 1 1A a b a_b
# 2 2 2B c d c_d
# 3 3 3B a b a_b
# 4 4 4B d e d_e
对于所有地区中inter
个重复项的计数:
all_region <- df %>%
count(inter)
# inter n
# 1 a_b 5
# 2 b_c 1
# 3 c_d 3
# 4 d_e 2
答案 1 :(得分:0)
Try this ( just need to drop some columns to meet your goal):
#1
df=df%>%group_by(region,inter)%>%dplyr::mutate(n=n())%>%filter(row_number()==n()&n>1)
> df
Source: local data frame [4 x 6]
Groups: region, inter [4]
region loc sp1 sp2 inter n
<fctr> <fctr> <fctr> <fctr> <fctr> <int>
1 1 1C a b a_b 3
2 2 2C c d c_d 3
3 3 3B a b a_b 2
4 4 4B d e d_e 2
#2
df%>%group_by(inter)%>%dplyr::mutate(n=n())%>%filter(row_number()==n()&n>1)
Source: local data frame [1 x 6]
Groups: inter [1]
region loc sp1 sp2 inter n
<fctr> <fctr> <fctr> <fctr> <fctr> <int>
1 3 3B a b a_b 2