按公司规模和月份计算

时间:2018-04-17 12:40:08

标签: sql postgresql

我有一个包含列的表员工:

  • company_id
  • employee_id
  • opted_out_on(日期)
  • opt_out_window_starts_on(日期)

两者都与员工有关,因此同一公司的员工之间存在差异。 我希望选择opted_out_on月等于opt_out_window_starts_on月份的员工人数,按公司规模分组。 我写了下面的查询,但数字不对。

源数据如下:

company_id | employee_id | opt_out_on | opt_out_window_starts_on
-----------+-------------+------------+-------------------------
     23101 |   920190500 | 20/01/2017 | 09/01/2017
     12264 |   920190508 | 02/03/2017 | 04/02/2017
     12039 |   920190513 | 07/11/2017 | 31/10/2017
     12039 |   920190516 | 13/02/2017 | 11/02/2017
     23109 |   920190520 | 06/01/2017 | 06/01/2017
     21875 |   920190529 | 22/05/2017 | 15/05/2017
     21875 |   920190532 | 19/05/2017 | 15/05/2017
     21875 |   920190533 | 22/05/2017 | 15/05/2017
     21875 |   920190534 | 15/05/2017 | 15/05/2017
     21875 |   920190535 | 22/05/2017 | 15/05/2017
     21875 |   920190536 | 19/05/2017 | 15/05/2017
     18112 |   920190545 | 11/01/2017 | 05/01/2017
      9764 |   920190546 | 02/05/2017 | 02/05/2017
     18365 |   920190665 | 30/03/2017 | 28/03/2017
     18365 |   920190668 | 28/03/2017 | 28/03/2017
     18365 |   920190678 | 28/03/2017 | 28/03/2017
     20928 |   920190699 | 14/06/2017 | 13/06/2017
     20928 |   920190705 | 20/06/2017 | 13/06/2017
     16860 |   920190717 | 17/01/2017 | 16/01/2017
     16860 |   920190718 | 16/01/2017 | 16/01/2017

此数据可在SQL Fiddle上找到。

我的代码是:

with size as 
(
    select distinct company_id
    , date_trunc('month', opt_out_window_starts_on) :: date months
    , case 
        when count(id) over (partition by company_id ) = 1 then 
            count(id)
            filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "one"
    , case 
        when count(id) over (partition by company_id ) = 2 then 
            count(id)
            filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "two"
    , case 
        when 
            count(id) over (partition by company_id ) >= 3 
            and count(id) over (partition by company_id ) <= 4
        then 
            count(id) filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "3-4"
    , case 
        when 
            count(id) over (partition by company_id ) >= 5 
            and count(id) over (partition by company_id ) <= 9
        then 
            count(id)
            filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "5-9"
    , case 
        when 
            count(id) over (partition by company_id ) >= 10 
            and count(id) over (partition by company_id ) <= 29
        then 
            count(id)
            filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "10-29"
    , case 
        when 
            count(id) over (partition by company_id ) >= 30 
            and count(id) over (partition by company_id ) <= 49
        then 
            count(id)
            filter (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "30-49"
    , case 
        when 
            count(id) over (partition by company_id) >= 50 
            and count(id) over (partition by company_id) <= 249
        then 
            count(id)
            filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "50-249"
    , case 
        when 
            count(id) over (partition by company_id ) >= 250
        then 
            count(id)
            filter 
            (
                where opt_state = 'opted_out' 
                and 
                (
                    date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
                )
            )
        else 0 
    end as "250+"
    from employees
    group by company_id
    , id
) 
select to_char(date_trunc('month', months),'YYYY MON')
, sum(one)
, sum(two)
, sum("3-4")
, sum("5-9")
, sum("10-29")
, sum("30-49")
, sum("50-249")
    , sum("250+")
from size
where to_char(date_trunc('month', months),'YYYY MON') is not null
group by date_trunc('month', months)
order by date_trunc('month', months) asc
;

错误结果的样本:

  to_char  | sum | sum | sum | sum | sum | sum | sum | sum
  ---------+-----+-----+-----+-----+-----+-----+-----+----
  2017 JAN |  35 |  37 |  85 | 119 | 131 |  39 |  19 |   3
  2017 FEB |  49 |  53 | 112 | 165 | 170 |  41 |  23 |   2
  2017 MAR |  29 |  33 |  62 |  85 | 104 |  24 |  23 |   1
  2017 APR |  37 |  50 | 117 | 150 | 159 |  33 |  28 |   3
  2017 MAY |  71 |  84 | 150 | 182 | 208 |  51 |  44 |   4
  2017 JUN |  35 |  51 |  80 |  90 | 128 |  26 |  43 |   7
  2017 JUL |  69 |  84 | 146 | 177 | 173 |  51 |  45 |   5
  2017 AUG |  93 | 137 | 168 | 194 | 234 |  63 |  51 |   5
  2017 SEP |  75 |  60 |  97 | 127 | 136 |  44 |  44 |   6
  2017 OCT |  91 | 113 | 132 | 157 | 126 |  38 |  49 |   6
  2017 NOV | 125 | 158 | 168 | 198 | 162 |  36 |  58 |  11
  2017 DEC |  49 |  81 |  83 | 103 |  91 |  27 |  40 |  10
  2018 JAN | 134 | 132 | 119 | 158 | 132 |  37 |  41 |  12
  2018 FEB | 127 | 111 | 155 | 142 | 121 |  37 |  45 |  15
  2018 MAR | 112 |  92 | 119 | 115 | 116 |  35 |  37 |  15
  2018 APR |  37 |  51 |  64 |  57 |  56 |  14 |  26 |  10
  2018 MAY |   0 |   0 |   0 |   0 |   0 |   0 |   0 |   0

如果以2017年1月为例,选择退出的员工总数为1404,而我的查询显示为468(35 + 37 + 85 + 119 + 131 + 39 + 19 + 3)。

我检查了数字:

select count(id)
, date_trunc('month', opt_out_window_starts_on)
from employees
where opt_state = 'opted_out'
and date_trunc('month', opt_out_window_starts_on) = date_trunc('month', opt_out_on)
group by date_trunc('month', opt_out_window_starts_on)

1 个答案:

答案 0 :(得分:1)

如果我想要正确理解你的问题:

  • 代表公司规模范围(即员工总数)的列
  • 给出年份和月份的行
  • 每个单元格的价值,表示那些公司在同一个月内选择退出某些公司的员工人数,可以选择退出。

此代码解决了这个问题的第一部分;获取公司名单,他们的规模和离职人数:

select company_id
, date_trunc('month', opt_out_window_starts_on) optOutMonth
, count(employee_id) as companySize
, count(case 
    when opt_state = 'opted_out'
    and date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
    then employee_id
    else null
end)
 as leaversInOptOutWindow
from employee
group by company_id, date_trunc('month', opt_out_window_starts_on)

SQL Fiddle

然后,您可以将此连接到另一个表中,在该表中定义范围以将公司分组为大小,并对结果执行调整。

可悲的是Postgres不支持数据透视表,但似乎可以通过crosstab函数实现相同:PostgreSQL Crosstab Query

我没有在SQL Fiddle中使用它;不确定我的代码是否存在问题,或者缺乏对此功能的支持(因为我对Postgres不够熟悉)......但我相信这应该属于什么你之后:

SELECT * FROM crosstab(
'
  select e.optOutMonth
    , s.name
    , sum(e.leaversInOptOutWindow) LeaverCount
    from
    (
        select company_id
        , date_trunc(''month'', opt_out_window_starts_on) optOutMonth
        , count(employee_id) as companySize
        , count(case 
            when opt_state = ''opted_out''
            and date_trunc(''month'', opt_out_on) = date_trunc(''month'', opt_out_window_starts_on)
            then employee_id
            else null
        end) as leaversInOptOutWindow
        from employee 
        group by company_id
        , date_trunc(''month'', opt_out_window_starts_on)
    ) e
    left outer join companySizeRange s
    on s.minSize <= e.companySize
    and (s.maxSize is null or s.maxSize >= e.companySize)
    group by e.optOutMonth, s.name
    order by e.optOutMonth, s.name
'
  , 'select name from companySizeRange order by minSize'
 ) as x(
   optOutMonth date, "1" int, "2" int, "3-4" int, "5-9" int, "10-29" int, "30-49" int, "50-249" int, "250+" int
 );

SQL Fiddle

更新

这是一个不需要crosstab / pivot的版本:

select e.optOutMonth
, sum(case when s.companySize = 1 then e.leaversInOptOutWindow else 0 end) "1"
, sum(case when s.companySize = 2 then e.leaversInOptOutWindow else 0 end) "2"
, sum(case when s.companySize between 3 and 4 then e.leaversInOptOutWindow else 0 end) "3-4"
, sum(case when s.companySize between 5 and 9 then e.leaversInOptOutWindow else 0 end) "5-9"
, sum(case when s.companySize between 10 and 29 then e.leaversInOptOutWindow else 0 end) "10-29"
, sum(case when s.companySize between 30 and 49 then e.leaversInOptOutWindow else 0 end) "30-49"
, sum(case when s.companySize between 50 and 249 then e.leaversInOptOutWindow else 0 end) "50-249"
, sum(case when s.companySize >= 250 then e.leaversInOptOutWindow else 0 end) "250+"
from
(
    select company_id
    , count(employee_id) as companySize
    from employee 
    group by company_id
) s
inner join
(
    select company_id
    , date_trunc('month', opt_out_window_starts_on) optOutMonth
    , count(employee_id) as leaversInOptOutWindow
    from employee 
    where opt_state = 'opted_out'
    and date_trunc('month', opt_out_on) = date_trunc('month', opt_out_window_starts_on)
    group by company_id
    , date_trunc('month', opt_out_window_starts_on)
) e
on e.company_id = s.company_id
group by e.optOutMonth
order by e.optOutMonth

SQL Fiddle