对sql中的日期范围进行计数和求和

时间:2017-11-30 10:21:24

标签: sql hana

当员工改变他/她的部门时,我会陷入特定情况。

以下是具体细节: 我在SAP HANA数据库中有2个表 - 部门和员工。

Dept_ID|Start_Date |End_Date

1      |15-Jan-2017|31-Dec-9999

Emp_ID|Dept_ID|Start_Date  |End_Date

123   |1      |1-Jan-2017  |31-Dec-9999

456   |1      |1-Jan-2017  |31-Dec-9999

789   |1      |1-Jan-2017  |25-Jan-2017

789   |2      |26-Jan-2017 |31-Dec-9999

666   |1      |23-Jan-2017 |31-Dec-9999

我在输出中需要的是每个部门的员工数量 -

Dept_ID|Emp_Count|Start_Date |End_Date

1      |3        |15-Jan-2017|23-Jan-2017

1      |4        |23-Jan-2017|25-Jan-2017

1      |3        |25-Jan-2017|31-Dec-9999

我尝试使用总和(分区)CTE,但我无法获得所需的结果。

请帮我解决这个问题。

编辑: 添加创建定义和插入语句

CREATE COLUMN TABLE DEPT ("DEPT_ID" NVARCHAR(400) NOT NULL ,
 "START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 "END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 PRIMARY KEY INVERTED VALUE ("DEPT_ID",
 "START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE 

CREATE COLUMN TABLE EMP ("EMP_ID" NVARCHAR(400) NOT NULL ,
 "DEPT_ID" NVARCHAR(4000),
 "START_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 "END_DATE" LONGDATE CS_LONGDATE NOT NULL ,
 PRIMARY KEY INVERTED VALUE ("EMP_ID",
 "START_DATE")) UNLOAD PRIORITY 5 AUTO MERGE 

insert into DEPT values('1','15.01.2017 22:58:09.0','31.12.9999 00:00:00.0')


insert into EMP values('123','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('456','1','01.01.2017 22:58:09.0','31.12.9999 00:00:00.0')
insert into EMP values('789','1','01.01.2017 22:58:09.0','25.01.2017 10:00:00.0')
insert into EMP values('789','2','25.01.2017 10:00:00.0','31.12.9999 00:00:00.0')
insert into EMP values('666','1','23.01.2017 22:58:09.0','31.12.9999 00:00:00.0')    

3 个答案:

答案 0 :(得分:1)

不幸的是,我无法在SAP HANA上测试它,因此,我发布了一个SQL Server解决方案。我尝试使用我发现对SAP HANA有效的语法。

with dates as
(
  select *, row_number() over (partition by t.dept_id order by dat) rn
  from
  (
    select dept_id, start_date dat from emp where emp.start_date > (select start_date from dept where dept_id = emp.dept_id)
    union all
    select dept_id, end_date dat from emp where emp.end_date < (select end_date from dept where dept_id = emp.dept_id)
    union all
    select dept_id, start_date  dat from dept
    union all
    select dept_id, end_date dat from dept
  ) t
)
select e.dept_id, count(*), t.startd, t.endd
from emp e
join
(
  select d1.dept_id, d1.dat startd, d2.dat endd
  from dates d1
  join dates d2 on d1.dept_id = d2.dept_id and d1.rn + 1 = d2.rn
) t on t.dept_id = e.dept_id and e.start_date < t.endd and e.end_date > t.startd
group by e.dept_id, t.startd, t.endd

demo

<强> RESULT

dept_id count   startd     endd
1       3       15/01/2017 23/01/2017
1       4       23/01/2017 25/01/2017
1       3       25/01/2017 31/12/9999

答案 1 :(得分:1)

我昨天无法完成这项任务,但由于我已经做了一些准备工作,所以对solution by Radim Bača进行了一些小改动。不同之处是:

  • 使用联接将部门从/到日期添加到员工行
  • 使用lead()函数代替row_number()(避免自联接)
  • 在department表中有一个部门2行

Demo at SQL Fiddle

CREATE TABLE Department 
    ([Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;

INSERT INTO Department 
    ([Dept_ID], [Start_Date], [End_Date])
VALUES
    (1, '2017-01-15 00:00:00', '9999-12-31 00:00:00'),
    (2, '2017-01-15 00:00:00', '9999-12-31 00:00:00')
;


CREATE TABLE Employee 
    ([Emp_ID] int, [Dept_ID] int, [Start_Date] datetime, [End_Date] datetime)
;

INSERT INTO Employee 
    ([Emp_ID], [Dept_ID], [Start_Date], [End_Date])
VALUES
    (123, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
    (456, 1, '2017-01-01 00:00:00', '9999-12-31 00:00:00'),
    (789, 1, '2017-01-01 00:00:00', '2017-01-25 00:00:00'),
    (789, 2, '2017-01-26 00:00:00', '9999-12-31 00:00:00'),
    (666, 1, '2017-01-23 00:00:00', '9999-12-31 00:00:00')
;

查询1

WITH
      e AS (
                  SELECT e.*, d.start_date stdt, d.end_date endt
                  FROM Employee e
                  INNER JOIN Department d ON e.dept_id = d.dept_id
            ),
      range AS (
                  SELECT
                        dept_id 
                      , start_date AS from_date 
                      , LEAD(start_date) OVER (PARTITION BY dept_id
                                               ORDER BY start_date) to_date
                  FROM (
                        SELECT dept_id , start_date FROM e WHERE e.start_date > e.stdt
                        UNION ALL
                        SELECT dept_id , end_date   FROM e WHERE e.end_date < e.endt
                        UNION ALL
                        SELECT dept_id , start_date FROM Department
                        UNION ALL
                        SELECT dept_id , end_date   FROM Department
                  ) r
            )
SELECT
      e.dept_id
    , r.from_date
    , r.to_date
    , COUNT(*) num_employees
FROM Employee e
INNER JOIN range r ON e.dept_id = r.dept_id
      AND e.start_date < r.to_date
      AND e.end_date > r.from_date
      AND r.to_date IS NOT NULL
GROUP BY
      e.dept_id
    , r.from_date
    , r.to_date

<强> Results

| dept_id |            from_date |              to_date | num_employees |
|---------|----------------------|----------------------|---------------|
|       1 | 2017-01-15T00:00:00Z | 2017-01-23T00:00:00Z |             3 |
|       1 | 2017-01-23T00:00:00Z | 2017-01-25T00:00:00Z |             4 |
|       1 | 2017-01-25T00:00:00Z | 9999-12-31T00:00:00Z |             3 |
|       2 | 2017-01-26T00:00:00Z | 9999-12-31T00:00:00Z |             1 |

答案 2 :(得分:0)

请参阅下面用SQL server编写的内容。

np.random.seed(0)
ind = np.random.randint(len(data), size=(50000,))
reduced_data = data[ind, :]
reduced_labels = labels[ind]