我需要一些帮助来修改我的查询。我观察到我的查询无法识别所有重叠的记录,例如在上面的记录列表中它可以识别重叠的日期,并且根据我的要求,RowEffectiveDate应该是下一个RowEndDate的一天,但对于此特定记录来说有点困难。它只会从表中删除2条记录8903和8904,并将保留所有其他记录。 任何帮助,将不胜感激。 下面提到了我正在使用的查询:
CREATE TABLE #dimP (
pcode char(4),
pkey int,
RowEffectiveDate datetime,
rowenddate datetime
);
INSERT INTO #dimP (pcode, pkey, RowEffectiveDate, rowenddate)
VALUES
('PL56',8855,'1900-01-01 00:00:00.000','2018-02-13 23:59:59.997'),
('PL56',8856,'2018-02-09 15:09:52.000','2018-02-14 23:59:59.997'),
('PL56',8903,'2018-02-09 15:09:52.000','2018-02-15 23:59:59.997'),
('PL56',8904,'2018-02-09 15:09:52.000','2018-02-16 23:59:59.997'),
('PL56',8935,'2018-02-14 00:00:00.000','2018-02-17 23:59:59.997'),
('PL56',8944,'2018-02-15 00:00:00.000','2018-02-18 23:59:59.997'),
('PL56',8955,'2018-02-16 00:00:00.000','2018-03-02 23:59:59.997'),
('PL56',8965,'2018-02-17 00:00:00.000','2018-03-03 23:59:59.997'),
('PL56',8972,'2018-02-18 00:00:00.000','2018-03-04 23:59:59.997'),
('PL56',8973,'2018-02-19 00:00:00.000','2018-03-05 23:59:59.997'),
('PL56',9114,'2018-03-03 00:00:00.000','2018-04-24 23:59:59.997'),
('PL56',9115,'2018-03-04 00:00:00.000','2018-04-25 23:59:59.997'),
('PL56',9136,'2018-03-05 00:00:00.000','2018-04-26 23:59:59.997'),
('PL56',9140,'2018-03-06 00:00:00.000','2018-04-29 23:59:59.997'),
('PL56',11597,'2018-04-25 00:00:00.000','2018-04-25 23:59:59.997'),
('PL56',11820,'2018-04-26 00:00:00.000','2018-04-26 23:59:59.997'),
('PL56',11837,'2018-04-27 00:00:00.000','2018-04-29 23:59:59.997'),
('PL56',11839,'2018-04-30 00:00:00.000','2018-05-22 23:59:59.997'),
('PL56',12372,'2018-05-23 00:00:00.000','9999-12-31 00:00:00.000');
SELECT *, ROW_NUMBER() OVER(PARTITION BY RowEffectiveDate ORDER BY rowenddate, PKey) AS rn
FROM #dimP;
WITH CTE AS (
SELECT pcode,pkey, RowEffectiveDate, rowenddate,
ROW_NUMBER() OVER(PARTITION BY RowEffectiveDate ORDER BY rowenddate, PKey) AS rn
FROM #dimP
WHERE pcode='KO18'
)
SELECT rn, pcode, pkey, RowEffectiveDate, rowenddate
FROM CTE
WHERE rn = 1
ORDER BY rn;
DROP TABLE #dimP;
当前输出:
pcode pkey RowEffectiveDate rowenddate
PL56 8855 1900-01-01 00:00:00.000 2018-02-13 23:59:59.997
PL56 8856 2018-02-09 15:09:52.000 2018-02-14 23:59:59.997
PL56 8903 2018-02-09 15:09:52.000 2018-02-15 23:59:59.997
PL56 8904 2018-02-09 15:09:52.000 2018-02-16 23:59:59.997
PL56 8935 2018-02-14 00:00:00.000 2018-02-17 23:59:59.997
PL56 8944 2018-02-15 00:00:00.000 2018-02-18 23:59:59.997
PL56 8955 2018-02-16 00:00:00.000 2018-03-02 23:59:59.997
PL56 8965 2018-02-17 00:00:00.000 2018-03-03 23:59:59.997
PL56 8972 2018-02-18 00:00:00.000 2018-03-04 23:59:59.997
PL56 8973 2018-02-19 00:00:00.000 2018-03-05 23:59:59.997
PL56 9114 2018-03-03 00:00:00.000 2018-04-24 23:59:59.997
PL56 9115 2018-03-04 00:00:00.000 2018-04-25 23:59:59.997
PL56 9136 2018-03-05 00:00:00.000 2018-04-26 23:59:59.997
PL56 9140 2018-03-06 00:00:00.000 2018-04-29 23:59:59.997
PL56 11597 2018-04-25 00:00:00.000 2018-04-25 23:59:59.997
PL56 11820 2018-04-26 00:00:00.000 2018-04-26 23:59:59.997
PL56 11837 2018-04-27 00:00:00.000 2018-04-29 23:59:59.997
PL56 11839 2018-04-30 00:00:00.000 2018-05-22 23:59:59.997
PL56 12372 2018-05-23 00:00:00.000 9999-12-31 00:00:00.000
预期输出应为:
pcode pkey RowEffectiveDate rowenddate
PL56 8855 1900-01-01 00:00:00.000 2018-02-13 23:59:59.997
PL56 8935 2018-02-14 00:00:00.000 2018-02-17 23:59:59.997
PL56 8972 2018-02-18 00:00:00.000 2018-03-04 23:59:59.997
PL56 9136 2018-03-05 00:00:00.000 2018-04-26 23:59:59.997
PL56 11837 2018-04-27 00:00:00.000 2018-04-29 23:59:59.997
PL56 11839 2018-04-30 00:00:00.000 2018-05-22 23:59:59.997
PL56 12372 2018-05-23 00:00:00.000 9999-12-31 00:00:00.000
答案 0 :(得分:0)
这实际上比我想象的要容易。
但是,请注意,我不认为这是“正确的”解决方案(而不是 condensing 或合并日期范围),并且我偷偷地怀疑所需的结果集应该实际上是原始的。
事实证明,我们最需要的只是一个递归查询:
WITH R AS (SELECT pcode, pkey, rowEffectiveDate, rowEndDate
FROM dimP AS a
-- This makes it get the "first" row, with no other rows starting before it
WHERE NOT EXISTS (SELECT 1
FROM dimP AS b
WHERE b.pcode = a.pcode
AND b.rowEffectiveDate < a.rowEffectiveDate)
UNION ALL
SELECT curr.pcode, curr.pkey, curr.rowEffectiveDate, curr.rowEndDate
FROM dimP AS curr
JOIN R
ON R.pcode = curr.pcode
-- This date math is very fragile, and the data should be changed
-- so that then end-value is exclusive, and equal to the next start
AND DATEADD(ms, 3, R.rowEndDate) = curr.rowEffectiveDate)
SELECT *
FROM R
ORDER BY pcode, rowEffectiveDate
SQL Fiddle Example
(请注意,如果有重复的“第二”行,这几乎完全中断了)
产生预期的结果:
pcode pkey rowEffectiveDate rowEndDate
----------------------------------------------------------------
PL56 8855 1900-01-01T00:00:00Z 2018-02-13T23:59:59.997Z
PL56 8935 2018-02-14T00:00:00Z 2018-02-17T23:59:59.997Z
PL56 8972 2018-02-18T00:00:00Z 2018-03-04T23:59:59.997Z
PL56 9136 2018-03-05T00:00:00Z 2018-04-26T23:59:59.997Z
PL56 11837 2018-04-27T00:00:00Z 2018-04-29T23:59:59.997Z
PL56 11839 2018-04-30T00:00:00Z 2018-05-22T23:59:59.997Z
PL56 12372 2018-05-23T00:00:00Z 9999-12-31T00:00:00.000Z
答案 1 :(得分:0)
我想我对如何解决这种情况有了一个主意,我是使用递归CTE来实现的。
Drop table if exists #dimP
go
CREATE TABLE #dimP (
pcode char(4),
pkey int,
RowEffectiveDate datetime,
rowenddate datetime
);
INSERT INTO #dimP (pcode, pkey, RowEffectiveDate, rowenddate)
VALUES
('PL56',8855,'1900-01-01 00:00:00.000','2018-02-13 23:59:59.997'),
('PL56',8856,'2018-02-09 15:09:52.000','2018-02-14 23:59:59.997'),
('PL56',8903,'2018-02-09 15:09:52.000','2018-02-15 23:59:59.997'),
('PL56',8904,'2018-02-09 15:09:52.000','2018-02-16 23:59:59.997'),
('PL56',8935,'2018-02-14 00:00:00.000','2018-02-17 23:59:59.997'),
('PL56',8944,'2018-02-15 00:00:00.000','2018-02-18 23:59:59.997'),
('PL56',8955,'2018-02-16 00:00:00.000','2018-03-02 23:59:59.997'),
('PL56',8965,'2018-02-17 00:00:00.000','2018-03-03 23:59:59.997'),
('PL56',8972,'2018-02-18 00:00:00.000','2018-03-04 23:59:59.997'),
('PL56',8973,'2018-02-19 00:00:00.000','2018-03-05 23:59:59.997'),
('PL56',9114,'2018-03-03 00:00:00.000','2018-04-24 23:59:59.997'),
('PL56',9115,'2018-03-04 00:00:00.000','2018-04-25 23:59:59.997'),
('PL56',9136,'2018-03-05 00:00:00.000','2018-04-26 23:59:59.997'),
('PL56',9140,'2018-03-06 00:00:00.000','2018-04-29 23:59:59.997'),
('PL56',11597,'2018-04-25 00:00:00.000','2018-04-25 23:59:59.997'),
('PL56',11820,'2018-04-26 00:00:00.000','2018-04-26 23:59:59.997'),
('PL56',11837,'2018-04-27 00:00:00.000','2018-04-29 23:59:59.997'),
('PL56',11839,'2018-04-30 00:00:00.000','2018-05-22 23:59:59.997'),
('PL56',12372,'2018-05-23 00:00:00.000','9999-12-31 00:00:00.000');
INSERT INTO #dimP (pcode, pkey, RowEffectiveDate, rowenddate)
VALUES ('KO18', 502 , '1900-01-01 00:00:00.000', '2017-12-06 23:59:59.997'),
('KO18', 5431 , '2017-12-07 00:00:00.000', '2018-01-29 23:59:59.997'),
('KO18', 6852 , '2018-01-30 00:00:00.000', '2018-02-09 23:59:59.997'),
('KO18', 8861 , '2018-02-10 00:00:00.000', '2018-04-24 23:59:59.997'),
('KO18', 8862 , '2018-02-10 00:00:00.000', '2018-04-25 23:59:59.997'),
('KO18', 11764, '2018-04-25 00:00:00.000', '2018-04-25 23:59:59.997'),
('KO18', 11797, '2018-04-26 00:00:00.000', '9999-12-31 00:00:00.000');
/*
SELECT *, ROW_NUMBER() OVER(PARTITION BY RowEffectiveDate ORDER BY rowenddate, PKey) AS rn
FROM #dimP
order by 1, 2
*/
;WITH CTE AS (
select pcode,pkey, RowEffectiveDate, rowenddate,RowEffectiveDateCalc,RowEndDateCalc
from (
SELECT pcode,pkey, RowEffectiveDate, rowenddate
,cast(RowEffectiveDate as date) RowEffectiveDateCalc
,cast(rowenddate as date) RowEndDateCalc
, ROW_NUMBER() OVER(PARTITION BY pcode ORDER BY RowEffectiveDate, rowenddate, PKey) AS rn
FROM #dimP
-- where PCode = 'PL56'
) X
where rn = 1
union all
SELECT NextRow.PCode, NextRow.Pkey, NextRow.RowEffectiveDate, NextRow.rowenddate
,cast(NextRow.RowEffectiveDate as date) RowEffectiveDateCalc
,cast(NextRow.rowenddate as date) RowEndDateCalc
FROM CTE CurRow
join #dimP NextRow
on CurRow.PCode = NextRow.PCode
and DateDiff(day, CurRow.RowEndDateCalc, cast(NextRow.RowEffectiveDate as date)) = 1
)
select * from CTE
order by 1, 2