Question

我需要一些帮助来修改我的查询。我观察到我的查询无法识别所有重叠的记录，例如在上面的记录列表中它可以识别重叠的日期，并且根据我的要求，RowEffectiveDate应该是下一个RowEndDate的一天，但对于此特定记录来说有点困难。它只会从表中删除2条记录8903和8904，并将保留所有其他记录。任何帮助，将不胜感激。下面提到了我正在使用的查询：

    CREATE TABLE #dimP (
  pcode char(4),
  pkey int,
  RowEffectiveDate datetime,
  rowenddate datetime
);
INSERT INTO #dimP (pcode, pkey, RowEffectiveDate, rowenddate)
  VALUES  
              ('PL56',8855,'1900-01-01 00:00:00.000','2018-02-13 23:59:59.997'),
              ('PL56',8856,'2018-02-09 15:09:52.000','2018-02-14 23:59:59.997'),
              ('PL56',8903,'2018-02-09 15:09:52.000','2018-02-15 23:59:59.997'),
              ('PL56',8904,'2018-02-09 15:09:52.000','2018-02-16 23:59:59.997'),
              ('PL56',8935,'2018-02-14 00:00:00.000','2018-02-17 23:59:59.997'),
              ('PL56',8944,'2018-02-15 00:00:00.000','2018-02-18 23:59:59.997'),
              ('PL56',8955,'2018-02-16 00:00:00.000','2018-03-02 23:59:59.997'),
              ('PL56',8965,'2018-02-17 00:00:00.000','2018-03-03 23:59:59.997'),
              ('PL56',8972,'2018-02-18 00:00:00.000','2018-03-04 23:59:59.997'),
              ('PL56',8973,'2018-02-19 00:00:00.000','2018-03-05 23:59:59.997'),
              ('PL56',9114,'2018-03-03 00:00:00.000','2018-04-24 23:59:59.997'),
              ('PL56',9115,'2018-03-04 00:00:00.000','2018-04-25 23:59:59.997'),
              ('PL56',9136,'2018-03-05 00:00:00.000','2018-04-26 23:59:59.997'),
              ('PL56',9140,'2018-03-06 00:00:00.000','2018-04-29 23:59:59.997'),
              ('PL56',11597,'2018-04-25 00:00:00.000','2018-04-25 23:59:59.997'),
              ('PL56',11820,'2018-04-26 00:00:00.000','2018-04-26 23:59:59.997'),
              ('PL56',11837,'2018-04-27 00:00:00.000','2018-04-29 23:59:59.997'),
              ('PL56',11839,'2018-04-30 00:00:00.000','2018-05-22 23:59:59.997'),
              ('PL56',12372,'2018-05-23 00:00:00.000','9999-12-31 00:00:00.000');

SELECT *, ROW_NUMBER() OVER(PARTITION BY RowEffectiveDate ORDER BY rowenddate, PKey) AS rn
FROM #dimP;

WITH CTE AS (

  SELECT pcode,pkey, RowEffectiveDate, rowenddate,
  ROW_NUMBER() OVER(PARTITION BY RowEffectiveDate ORDER BY rowenddate, PKey) AS rn
  FROM #dimP
  WHERE pcode='KO18'
)
SELECT rn, pcode, pkey, RowEffectiveDate, rowenddate
FROM CTE
WHERE rn = 1
ORDER  BY rn;

DROP TABLE #dimP;

当前输出：

pcode  pkey          RowEffectiveDate         rowenddate
PL56    8855    1900-01-01 00:00:00.000 2018-02-13 23:59:59.997
PL56    8856    2018-02-09 15:09:52.000 2018-02-14 23:59:59.997
PL56    8903    2018-02-09 15:09:52.000 2018-02-15 23:59:59.997
PL56    8904    2018-02-09 15:09:52.000 2018-02-16 23:59:59.997
PL56    8935    2018-02-14 00:00:00.000 2018-02-17 23:59:59.997
PL56    8944    2018-02-15 00:00:00.000 2018-02-18 23:59:59.997
PL56    8955    2018-02-16 00:00:00.000 2018-03-02 23:59:59.997
PL56    8965    2018-02-17 00:00:00.000 2018-03-03 23:59:59.997
PL56    8972    2018-02-18 00:00:00.000 2018-03-04 23:59:59.997
PL56    8973    2018-02-19 00:00:00.000 2018-03-05 23:59:59.997
PL56    9114    2018-03-03 00:00:00.000 2018-04-24 23:59:59.997
PL56    9115    2018-03-04 00:00:00.000 2018-04-25 23:59:59.997
PL56    9136    2018-03-05 00:00:00.000 2018-04-26 23:59:59.997
PL56    9140    2018-03-06 00:00:00.000 2018-04-29 23:59:59.997
PL56    11597   2018-04-25 00:00:00.000 2018-04-25 23:59:59.997
PL56    11820   2018-04-26 00:00:00.000 2018-04-26 23:59:59.997
PL56    11837   2018-04-27 00:00:00.000 2018-04-29 23:59:59.997
PL56    11839   2018-04-30 00:00:00.000 2018-05-22 23:59:59.997
PL56    12372   2018-05-23 00:00:00.000 9999-12-31 00:00:00.000

预期输出应为：

pcode   pkey    RowEffectiveDate           rowenddate
PL56    8855    1900-01-01 00:00:00.000 2018-02-13 23:59:59.997
PL56    8935    2018-02-14 00:00:00.000 2018-02-17 23:59:59.997
PL56    8972    2018-02-18 00:00:00.000 2018-03-04 23:59:59.997
PL56    9136    2018-03-05 00:00:00.000 2018-04-26 23:59:59.997
PL56    11837   2018-04-27 00:00:00.000 2018-04-29 23:59:59.997
PL56    11839   2018-04-30 00:00:00.000 2018-05-22 23:59:59.997
PL56    12372   2018-05-23 00:00:00.000 9999-12-31 00:00:00.000

Answer 1

这实际上比我想象的要容易。

但是，请注意，我不认为这是“正确的”解决方案（而不是 condensing 或合并日期范围），并且我偷偷地怀疑所需的结果集应该实际上是原始的。

事实证明，我们最需要的只是一个递归查询：

WITH R AS (SELECT pcode, pkey, rowEffectiveDate, rowEndDate
           FROM dimP AS a
           -- This makes it get the "first" row, with no other rows starting before it
           WHERE NOT EXISTS (SELECT 1
                             FROM dimP AS b
                             WHERE b.pcode = a.pcode
                                   AND b.rowEffectiveDate < a.rowEffectiveDate)
           UNION ALL
           SELECT curr.pcode, curr.pkey, curr.rowEffectiveDate, curr.rowEndDate
           FROM dimP AS curr
           JOIN R
             ON R.pcode = curr.pcode
                -- This date math is very fragile, and the data should be changed
                -- so that then end-value is exclusive, and equal to the next start
                AND DATEADD(ms, 3, R.rowEndDate) = curr.rowEffectiveDate)

SELECT *
FROM R
ORDER BY pcode, rowEffectiveDate

SQL Fiddle Example
（请注意，如果有重复的“第二”行，这几乎完全中断了）

产生预期的结果：

pcode   pkey    rowEffectiveDate        rowEndDate
----------------------------------------------------------------
PL56    8855    1900-01-01T00:00:00Z    2018-02-13T23:59:59.997Z
PL56    8935    2018-02-14T00:00:00Z    2018-02-17T23:59:59.997Z
PL56    8972    2018-02-18T00:00:00Z    2018-03-04T23:59:59.997Z
PL56    9136    2018-03-05T00:00:00Z    2018-04-26T23:59:59.997Z
PL56    11837   2018-04-27T00:00:00Z    2018-04-29T23:59:59.997Z
PL56    11839   2018-04-30T00:00:00Z    2018-05-22T23:59:59.997Z
PL56    12372   2018-05-23T00:00:00Z    9999-12-31T00:00:00.000Z

Answer 2

我想我对如何解决这种情况有了一个主意，我是使用递归CTE来实现的。

Drop table if exists #dimP
go

CREATE TABLE #dimP (
  pcode char(4),
  pkey int,
  RowEffectiveDate datetime,
  rowenddate datetime
);
INSERT INTO #dimP (pcode, pkey, RowEffectiveDate, rowenddate)
  VALUES  
      ('PL56',8855,'1900-01-01 00:00:00.000','2018-02-13 23:59:59.997'),
      ('PL56',8856,'2018-02-09 15:09:52.000','2018-02-14 23:59:59.997'),
      ('PL56',8903,'2018-02-09 15:09:52.000','2018-02-15 23:59:59.997'),
      ('PL56',8904,'2018-02-09 15:09:52.000','2018-02-16 23:59:59.997'),
      ('PL56',8935,'2018-02-14 00:00:00.000','2018-02-17 23:59:59.997'),
      ('PL56',8944,'2018-02-15 00:00:00.000','2018-02-18 23:59:59.997'),
      ('PL56',8955,'2018-02-16 00:00:00.000','2018-03-02 23:59:59.997'),
      ('PL56',8965,'2018-02-17 00:00:00.000','2018-03-03 23:59:59.997'),
      ('PL56',8972,'2018-02-18 00:00:00.000','2018-03-04 23:59:59.997'),
      ('PL56',8973,'2018-02-19 00:00:00.000','2018-03-05 23:59:59.997'),
      ('PL56',9114,'2018-03-03 00:00:00.000','2018-04-24 23:59:59.997'),
      ('PL56',9115,'2018-03-04 00:00:00.000','2018-04-25 23:59:59.997'),
      ('PL56',9136,'2018-03-05 00:00:00.000','2018-04-26 23:59:59.997'),
      ('PL56',9140,'2018-03-06 00:00:00.000','2018-04-29 23:59:59.997'),
      ('PL56',11597,'2018-04-25 00:00:00.000','2018-04-25 23:59:59.997'),
      ('PL56',11820,'2018-04-26 00:00:00.000','2018-04-26 23:59:59.997'),
      ('PL56',11837,'2018-04-27 00:00:00.000','2018-04-29 23:59:59.997'),
      ('PL56',11839,'2018-04-30 00:00:00.000','2018-05-22 23:59:59.997'),
      ('PL56',12372,'2018-05-23 00:00:00.000','9999-12-31 00:00:00.000');
INSERT INTO #dimP (pcode, pkey, RowEffectiveDate, rowenddate)
  VALUES  ('KO18', 502  , '1900-01-01 00:00:00.000', '2017-12-06 23:59:59.997'),
    ('KO18', 5431 , '2017-12-07 00:00:00.000', '2018-01-29 23:59:59.997'),
    ('KO18', 6852 , '2018-01-30 00:00:00.000', '2018-02-09 23:59:59.997'),
    ('KO18', 8861 , '2018-02-10 00:00:00.000', '2018-04-24 23:59:59.997'),
    ('KO18', 8862 , '2018-02-10 00:00:00.000', '2018-04-25 23:59:59.997'),
    ('KO18', 11764, '2018-04-25 00:00:00.000', '2018-04-25 23:59:59.997'),
    ('KO18', 11797, '2018-04-26 00:00:00.000', '9999-12-31 00:00:00.000');

/*
SELECT *, ROW_NUMBER() OVER(PARTITION BY RowEffectiveDate ORDER BY rowenddate, PKey) AS rn
FROM #dimP
order by 1, 2
*/

;WITH CTE AS (
select pcode,pkey, RowEffectiveDate, rowenddate,RowEffectiveDateCalc,RowEndDateCalc
  from (
          SELECT pcode,pkey, RowEffectiveDate, rowenddate
                    ,cast(RowEffectiveDate as date)  RowEffectiveDateCalc
                    ,cast(rowenddate as date)  RowEndDateCalc
                    ,  ROW_NUMBER() OVER(PARTITION BY pcode ORDER BY RowEffectiveDate, rowenddate, PKey) AS rn
          FROM #dimP
--          where PCode = 'PL56'
        ) X
    where rn = 1
union all
    SELECT NextRow.PCode, NextRow.Pkey, NextRow.RowEffectiveDate, NextRow.rowenddate
            ,cast(NextRow.RowEffectiveDate as date)  RowEffectiveDateCalc
                    ,cast(NextRow.rowenddate as date)  RowEndDateCalc
    FROM CTE CurRow
        join #dimP NextRow
        on CurRow.PCode = NextRow.PCode
        and DateDiff(day, CurRow.RowEndDateCalc, cast(NextRow.RowEffectiveDate as date)) = 1
)

select * from CTE
order by 1, 2

重叠日期-开始日期应为结束日期的第二天

2 个答案: