SQL Optimize - From History表从两个不同的日期获取值

时间:2011-08-20 07:19:12

标签: sql sql-server sql-server-2005 tsql optimization

不知道从哪里开始......但基本上我有一个报告表,一个帐户表和一个帐户历史记录表。帐户历史记录表将包含零个或多个记录,其中每个记录是更改后帐户取消标记的状态。 还有其他的东西,但基本上我希望返回帐户详细信息数据,帐户状态取消位在开始日期和结束日期不同的列。

这样做的最佳方式是什么?

我在下面有以下工作查询

想法)我应该在历史记录表中为每个日期分别加1吗?

我想我可以在三个单独的查询中执行此操作(获取开始快照,结束快照,正常报告查询以及每个快照的连接)

其他什么?

预期输出:

AccountID, OtherData, StartDateCancelled, EndDateCancelled

测试表:

DECLARE @Report TABLE (ReportID INT, StartDate DATETIME, EndDate DATETIME)
DECLARE @ReportAccountDetail TABLE( ReportID INT, Accountid INT, Cancelled BIT )
DECLARE @AccountHistory TABLE( AccountID INT, ModifiedDate DATETIME, Cancelled BIT )

INSERT INTO @Report
SELECT 1,'1/1/2011', '2/1/2011'
--
INSERT INTO @ReportAccountDetail
SELECT 1 AS ReportID, 1 AS AccountID, 0 AS Cancelled
UNION
SELECT 1,2,0
UNION
SELECT 1,3,1
UNION
SELECT 1,4,1
--
INSERT INTO @AccountHistory
SELECT 2 AS CustomerID, '1/2/2010' AS ModifiedDate, 1 AS Cancelled
UNION--
SELECT 3, '2/1/2011', 1
UNION--
SELECT 4, '1/1/2010', 1
UNION
SELECT 4, '2/1/2010', 0
UNION
SELECT 4, '2/1/2011', 1

当前查询:

SELECT Accountid, OtherData,
 MAX(CASE WHEN BeginRank = 1 THEN CASE WHEN BeginHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS StartDateCancelled,
 MAX(CASE WHEN EndRank = 1 THEN CASE WHEN EndHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS EndDateCancelled
FROM
(
SELECT c.Accountid,
'OtherData' AS OtherData,
--lots of other data
ROW_NUMBER() OVER (PARTITION BY c.AccountID ORDER BY
    CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS BeginRank,
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END AS BeginHistoryExists,
ROW_NUMBER() OVER ( PARTITION BY c.AccountID ORDER BY
    CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS EndRank,
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END AS EndHistoryExists,
    CAST( ch.Cancelled AS INT) AS HistoryCancelled,
    0 AS DefaultCancel
FROM
@Report AS Report
INNER JOIN @ReportAccountDetail AS C ON Report.ReportID = C.ReportID
--Others joins related for data to return
LEFT JOIN @AccountHistory AS CH ON CH.AccountID = C.AccountID
WHERE Report.ReportID = 1
) AS x
GROUP BY AccountID, OtherData

欢迎写入堆栈溢出问题的输入。谢谢!

1 个答案:

答案 0 :(得分:4)

ROW_NUMBER()经常让我感到惊讶并超出我的期望。但是,在这种情况下,我很想使用相关的子查询。至少,我会根据替代方案对它们进行测试。

注意:我还会使用真实的表格,真实的索引和真实的假数据量。 (如果值得发布这个问题,我认为值得现实地测试一下。)

SELECT
  [Report].ReportID,
  [Account].AccountID,
  [Account].OtherData,
  ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].StartDate ORDER BY ModifiedDate DESC), 0) AS StartDateCancelled,
  ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].EndDate   ORDER BY ModifiedDate DESC), 0) AS EndDateCancelled
FROM
  Report                 AS [Report]
LEFT JOIN
  ReportAccountDetail    AS [Account]
    ON [Account].ReportID = [Report].ReportID
ORDER BY
  [Report].ReportID,
  [Account].AccountID

注意:无论出于何种原因,我发现TOP 1ORDER BYMAX()更快。


根据您建议的答案,我会略微修改它以使用ISNULL,而不是尝试让Exists列工作。

我还要在完成所有工作后加入“其他数据”,而不是在最里面的查询中,以避免必须按所有“其他数据”进行分组。

WITH
  HistoricData AS
(
  SELECT
    Report.ReportID,
    c.Accountid,
    c.OtherData,
    ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate DESC) AS BeginRank,
    ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY ch.ModifiedDate DESC) AS EndRank,
    CH.Cancelled
  FROM
    @Report AS Report
  INNER JOIN
    @ReportAccountDetail AS C
      ON Report.ReportID = C.ReportID
  LEFT JOIN
    @AccountHistory AS CH
      ON  CH.AccountID     = C.AccountID
      AND CH.ModifiedDate <= Report.EndDate
)
,
  FlattenedData AS
(
  SELECT
    ReportID,
    Accountid,
    OtherData,
    ISNULL(MAX(CASE WHEN BeginRank = 1 THEN Cancelled END), 0) AS StartDateCancelled,
    ISNULL(MAX(CASE WHEN EndRank   = 1 THEN Cancelled END), 0) AS EndDateCancelled
  FROM
    [HistoricData]
  GROUP BY
    ReportID,
    AccountID,
    OtherData
)
SELECT
  *
FROM
  [FlattenedData]
LEFT JOIN
  [OtherData]
    ON Whatever = YouLike
WHERE
  [FlattenedData].ReportID = 1


最终可能的版本......

WITH
  ReportStartHistory AS
(
  SELECT
    *
  FROM
  (
    SELECT
      [Report].ReportID,
      ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
      [History].*
    FROM
      Report                 AS [Report]
    INNER JOIN
      AccountHistory         AS [History]
        ON [History].ModifiedDate <= [Report].StartDate
  )
    AS [data]
  WHERE
    SequenceID = 1
)
,
  ReportEndHistory AS
(
  SELECT
    *
  FROM
  (
    SELECT
      [Report].ReportID,
      ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
      [History].*
    FROM
      Report                 AS [Report]
    INNER JOIN
      AccountHistory         AS [History]
        ON [History].ModifiedDate <= [Report].EndDate
  )
    AS [data]
  WHERE
    SequenceID = 1
)
SELECT
  [Report].ReportID,
  [Account].*,
  ISNULL([ReportStartHistory].Cancelled, 0) AS StartDateCancelled,
  ISNULL([ReportEndHistory].Cancelled,   0) AS EndDateCancelled
FROM
  Report                     AS [Report]
INNER JOIN
  Account                    AS [Account]
LEFT JOIN
  [ReportStartHistory]
    ON  [ReportStartHistory].ReportID  = [Report].ReportID
    AND [ReportStartHistory].AccountID = [Account].AccountID
LEFT JOIN
  [ReportEndHistory]
    ON  [ReportEndHistory].ReportID    = [Report].ReportID
    AND [ReportEndHistory].AccountID   = [Account].AccountID