排除group_by的整洁解决方案

时间:2019-05-12 13:42:34

标签: r dplyr

我需要根据客户调查结果计算员工的净推荐人分数。在下面的示例中,我知道如何使用group_by来应用函数以获取组或每个员工的分数。但是,我需要的是不包括员工在内的团体分数。我不知道如何使用dplyr或tidyr。

# toy example
library(dplyr)
net_promoter_score <- function(v) ((sum(v>=9,na.rm=T)-sum(v<=6,na.rm=T))/sum(!is.na(v))*100)
set.seed(1)
df <- data.frame( employee = sample(c("John","Vera","Amy","Bruce"),100,replace = T),
                  customer_service_score = sample(0:10,100,replace=T,prob=c(0.15,0.15,rep(0.15/7,7),0.25,0.3))
                  )
df <- df %>%
  mutate(group = ifelse(employee %in% c("John","Vera"),"A","B"))
df %>% head
  employee customer_service_score group
1     Vera                      1     A
2     Vera                      9     A
3      Amy                     10     B
4    Bruce                      3     B
5     John                      1     A
6    Bruce                     10     B

# example of peer net promoter score calculated for group A without Amy.  I need this done for all employees.
Amy_peer_score = df %>% filter(employee!="Amy",group=="A\B") %>% select(customer_service_score) %>% net_promoter_score
Amy_peer_score
[1] -11.11111

1 个答案:

答案 0 :(得分:1)

一种选择是按“组”分组,然后循环遍历unique“雇员”,找到“ customer_service_score”行,其中“雇员”不是唯一的“雇员”,然后应用net_promoter_score

library(tidyverse)
df %>% 
    group_by(group) %>%
    summarise(score = list(map(unique(employee), ~ 
        tibble(employee_excluded = .x,   
          score = net_promoter_score(customer_service_score[employee != .x]))))) %>% 
    unnest %>% 
    unnest
# A tibble: 8 x 3
#  group employee_excluded score
#  <fct> <fct>             <dbl>
#1 A     Amy                58.1
#2 A     Vera               46.2
#3 A     Bruce              59.1
#4 A     John               45.2
#5 B     Vera               22.2
#6 B     Bruce              26.5
#7 B     John               37.8
#8 B     Amy                45.9

或使用data.table

library(data.table)
setDT(df)[, .(employee_excluded = unique(employee),
    score = unlist(lapply(unique(employee), 
    function(x) net_promoter_score(customer_service_score[employee != x])))),
            group]

如果我们使用更改的功能

net_promoter_score <- function(d,col_name) {
  ((sum(d[[col_name]]>=9,na.rm=TRUE)-
    sum(d[[col_name]]<=6,na.rm=TRUE))/sum(!is.na(d[[col_name]]))*100)
 }

然后,我们可以更改代码

df %>%
    group_by(group) %>%
    nest %>%
    mutate(data = map(data, ~ 
           .x %>% 
             distinct(employee) %>% 
             pull(employee) %>% 
             map_df(function(x) 
                .x %>% 
                   filter(employee != x) %>%
                   summarise(employee_excluded = x,
                             score = net_promoter_score(., 
                               "customer_service_score"))) )) %>% 
     unnest
# A tibble: 8 x 3
#  group employee_excluded score
#  <fct> <fct>             <dbl>
#1 A     Amy                58.1
#2 A     Vera               46.2
#3 A     Bruce              59.1
#4 A     John               45.2
#5 B     Vera               22.2
#6 B     Bruce              26.5
#7 B     John               37.8
#8 B     Amy                45.9