ID<-(c(1,1,1,1,1,1,2,3))
type<-(c("A1","A1","A1","A1","A1","A1","A1","B1"))
ag_id<-(c("A0001","A0001","A0001","A0001","A0001","A0001","A0001","B0001"))
Outcome<-(c("Aggressive","Aggressive","Aggressive","Aggressive","Aggressive","Aggressive","Aggressive","Balanced"))
bdate<-c("2012","2012","2012","2012","2012","2012","2012","2012")
sd_num<-(c("AIG0001","AIG0001","AIG0001","AIG0001","AIG0001","AIG0001","AIG0001","AIG0001"))
buy_pattern<-data.frame(ID,type,ag_id,Outcome,bdate,sd_num)
# > buy_pattern
# ID type ag_id Outcome bdate sd_num
# 1 1 A1 A0001 Aggressive 2012 AIG0001
# 2 1 A1 A0001 Aggressive 2012 AIG0001
# 3 1 A1 A0001 Aggressive 2012 AIG0001
# 4 1 A1 A0001 Aggressive 2012 AIG0001
# 5 1 A1 A0001 Aggressive 2012 AIG0001
# 6 1 A1 A0001 Aggressive 2012 AIG0001
# 7 2 A1 A0001 Aggressive 2012 AIG0001
# 8 3 B1 B0001 Balanced 2012 AIG0001
如何为群组中的ID列找到唯一值,我可以使用sqldf
列中的distinctID
来完成
data<-sqldf("select count(distinct ID) as distinctID,count(type) as rowCount,type,ag_id,Outcome,bdate,sd_num from buy_pattern group by ag_id,Outcome,sd_num,bdate")
# > data
# distinctID rowCount type ag_id Outcome bdate sd_num
# 1 2 7 A1 A0001 Aggressive 2012 AIG0001
# 2 1 1 B1 B0001 Balanced 2012 AIG0001
尝试使用长度(唯一(ID))时,会计算总行数而不是特定组中的计数
data<-buy_pattern %>% select(type,ag_id,Outcome,bdate,sd_num) %>%
group_by(type,ag_id,Outcome,sd_num,bdate) %>%
mutate(rowCount = n(),distinctID=length(unique(ID))) %>%
arrange(ag_id,Outcome,sd_num, desc(rowCount)) %>%
slice(1)
# > data
# distinctID rowCount type ag_id Outcome bdate sd_num
# 1 3 7 A1 A0001 Aggressive 2012 AIG0001
# 2 3 1 B1 B0001 Balanced 2012 AIG0001
答案 0 :(得分:1)
主要原因是'ID'作为对象在全局环境中创建为vector
,在dplyr
链中,select
未调用'ID '导致'ID'被从全球环境中取出。整个矢量'ID'将具有3个唯一元素,并且不遵循group_by
步骤。基本上,将“ID”保留在select
内将解决问题。有n_distinct
替换length(unique(
buy_pattern %>%
select(ID, type,ag_id,Outcome,bdate,sd_num) %>% # change here
group_by(type,ag_id,Outcome,sd_num,bdate) %>%
mutate(rowCount = n(),distinctID=length(unique(ID))) %>%
arrange(ag_id,Outcome,sd_num, desc(rowCount)) %>%
slice(1)
# A tibble: 2 x 8
# Groups: type, ag_id, Outcome, sd_num, bdate [2]
# ID type ag_id Outcome bdate sd_num rowCount distinctID
# <dbl> <fctr> <fctr> <fctr> <fctr> <fctr> <int> <int>
#1 1 A1 A0001 Aggressive 2012 AIG0001 7 2
#2 3 B1 B0001 Balanced 2012 AIG0001 1 1
我们可以使用mutate
summarise
buy_pattern %>%
group_by(type, ag_id, Outcome, sd_num, bdate) %>%
summarise(rowCount = n(), distinctID = n_distinct(ID))