SQL Server 2008:我很难将伪SQL转换为实际执行。简而言之,我有一个数据集:
id, startdate, enddate
所以,例如:
1,1/1/2010,2/1/2010
1,3/1/2010,3/15/2010
2,4/1/2010,6/1/2010
2,5/1/2010,5/15/2010
2,7/1/2010,7/15/2010
每个ID可以多次列出,日期可能会重叠。
我需要:
a)获取每个ID的连续日期。所以,例如:
1 = 1/1/2010 thru 2/1/2010 + 3/1/2010 through 3/15/2010 = 43
2 = 4/1/2010 thru 6/1/2010 + 7/1/2010 through 7/15/2010 (note: the 5/1/10 thru 5/15 was omitted because it overlapped)
我最初的想法是编写一个执行此操作的SQL查询:
Date, ID, Active
我正在提取整整一年的数据,因此我会为表中的每个ID提供一个日期/ ID条目。 “有效”将为1或0,具体取决于该ID是否在该特定日期“开启”。
然后我可以将其分组并获得今年累积的“不同”。
问题是,这将创建一个包含超过3亿条记录的表格,而我无法想象没有更好的方法可以做到这一点。
非常感谢任何建议。
答案 0 :(得分:1)
如果我理解你的问题,这会给你结果
/*setup data*/
CREATE TABLE #dates
(
id INT,
startdate DATETIME,
enddate DATETIME
)
INSERT INTO #dates
SELECT 1,'1/1/2010','2/1/2010'
UNION ALL
SELECT 1,'3/1/2010','3/15/2010'
UNION ALL
SELECT 2,'4/1/2010','6/1/2010'
UNION ALL
SELECT 2,'5/1/2010','5/15/2010'
UNION ALL
SELECT 2,'7/1/2010','7/15/2010'
/* this is our "tally-table" maybe make this static*/
CREATE TABLE #numbers
(
NUM INT PRIMARY KEY CLUSTERED
)
;WITH Nbrs ( n ) AS (
SELECT 1 UNION ALL
SELECT 1 + n FROM Nbrs WHERE n < 500 )
INSERT INTO #numbers
SELECT n FROM Nbrs
OPTION ( MAXRECURSION 500 )
/*first we get our full range*/
;WITH fullrange
AS
(
SELECT D.id, DATEADD(dd,N.num-1,D.startdate) AS dte
FROM #dates D
INNER JOIN #numbers N
ON N.num <= DATEDIFF(dd,D.startdate, D.enddate)
/*By joining to the numbers/tally table we can extrapolate the full range of dates like you alluded to in your considered approach*/
)
/*then we aggregate*/
SELECT id, COUNT(DISTINCT dte) AS active --Now we can just count distinct dates for each id
FROM fullrange
GROUP BY id
答案 1 :(得分:1)
http://data.stackexchange.com/stackoverflow/q/109335/
DECLARE @tbl AS TABLE (id INT, startdate DATETIME, enddate DATETIME);
INSERT INTO @tbl VALUES
(1,'1/1/2010','2/1/2010')
,(1,'3/1/2010','3/15/2010')
,(2,'4/1/2010','6/1/2010')
,(2,'5/1/2010','5/15/2010')
,(2,'7/1/2010','7/15/2010');
WITH alldates AS (
-- Adjust start date and number of days
SELECT TOP 100000 DATEADD(d, ROW_NUMBER() OVER(ORDER BY ac1.object_id) - 1, '1/1/2010') AS dt
FROM master.sys.all_columns ac1
CROSS JOIN master.sys.all_columns ac2
)
SELECT id, COUNT(DISTINCT alldates.dt)
FROM alldates
INNER JOIN @tbl AS period
ON alldates.dt BETWEEN period.startdate AND period.enddate
GROUP BY id;