这可以简化并因此进行优化

时间:2012-12-28 17:17:27

标签: sql sql-server-2005

我们有这个数据集:

CREATE TABLE #Changes 
    (
    [GUID] varchar(250), 
    Value numeric(36,6), 
    DocumentNumber varchar(250), 
    Approved bit, 
    ApprovedDate varchar(250), 
    IssuedDate varchar(250), 
    Category varchar(250)
    );
INSERT INTO #Changes 
    (
    [GUID], 
    DocumentNumber, 
    Approved, 
    Value, 
    ApprovedDate, 
    IssuedDate, 
    Category
    ) 
 values
 ('4F7253A4E1B3D841B84D4A82B4F0E7A2', 11, 0, 18526.7, '', '2009-03-31T05:00:00Z', 'UNKNOWN'),
 ('D97537852E927B499C21C14F3D13CF06', 1, 0, 0, '', '2008-11-10T05:00:00Z', 'UNKNOWN'),
 ('857DADB463807345918729B33399B36F', 2, 0, 0, '', '2008-11-10T05:00:00Z', 'UNKNOWN'),
 ('7989D242E05AFF4FB5EE99114822BF80', 21, 0, 50112, '', '2009-07-22T05:00:00Z', 'UNKNOWN'),
 ('16A0AB27FD3A784D9E0A14406C7683E0', 3, 0, 0, '', '2009-01-15T05:00:00Z', 'UNKNOWN'),
 ('D3D7B1C306D38C438FC3DEDFCB57D411', 131, 0, 17204, '', '2010-12-14T05:00:00Z', 'UNKNOWN'),
 ('2C89D974DDF86743A0D7D62B385FBDEF', 147, 0, 0, '', '2010-12-01T05:00:00Z', 'UNKNOWN'),
 ('F371D4237C837D448824697EB0162905', 198, 0, 0, '', '2011-01-10T05:00:00Z', 'UNKNOWN'),
  ('433D64C871AE4E46A0E1BFCE2BB69BA7', 364, 0, 0, '', '2011-11-14T05:00:00Z', 'UNKNOWN'),
  ('808496DBDE76CB4F911396BB817724F3', 352, 0, 0, '', '2011-10-17T05:00:00Z', 'UNKNOWN'),
  ('9545DEF1666B5F4D8626F19F8E9E9333', 418, 0, 10948, '', '2012-03-07T22:19:18Z', 'UNKNOWN'),
  ('244D7D89B79E0F4E91100E4ADB300656', 439, 0, 50945, '', '2012-04-27T20:33:26Z', 'UNKNOWN'),
  ('115A427BBB1D2C43BA11D9E5875FAA2C', 465, 0, 480049, '', '2012-07-20T16:17:54Z', 'UNKNOWN'),
  ('3A2271EFCC767E4CA40017E68802F10C', 478, 0, 54298, '', '2012-08-01T17:26:38Z', 'UNKNOWN'),
  ('99D0EFC5A9F1AA498DB1A4CDF294129B', 490, 0, 11500, '', '2012-09-18T14:23:13Z', 'ALTER'),
  ('38B2E3A379C5084998E6A84D496AC555', 491, 0, 26088, '', '2012-09-25T06:00:00Z', 'ALTER'),
  ('8902831C8FAD4941841EE2847656BDAF', 494, 0, -825, '', '2012-10-16T14:20:06Z', 'ALTER'),
  ('7AFDB08A002AE54A8DE7699855AEBE30', 495, 0, 221, '', '2012-10-16T14:21:27Z', 'ALTER'),
  ('38A2CCEF5F0B294AA8B8752F461D121D', 496, 0, 0, '', '2012-12-24T01:11:15Z', 'ALTER'),
  ('24CCD5CE409E674593108CBD816DBCCE', 486, 1, -825, '2012-10-01T21:42:52Z', '2012-09-17T20:42:12Z', 'ALTER'),
  ('C7458704E36C8F448C1F3A485EB08304', 485, 1, 10000, '2012-10-01T21:25:56Z', '2012-09-11T21:29:44Z', 'ALTER'),
  ('B511953AE6FB6446A63AA83C159057BE', 487, 1, 82170, '2012-10-01T21:42:51Z', '2012-09-17T20:46:41Z', 'ALTER'),
  ('EC977BC304A971439D04BB9DF4D8188A',488, 1, 15500, '2012-10-01T20:58:15Z', '2012-09-18T06:00:00Z', 'ALTER'),
  ('D9B1F0C0A8E490448697B783639E09E0', 489, 1, 11503, '2012-10-01T21:42:50Z', '2012-09-18T13:56:18Z', 'ALTER'),
  ('698BB6D65832D146A49727C717A591A1', 492, 1, 2787, '2012-10-01T21:10:06Z', '2012-09-25T15:55:02Z', 'ALTER'),
  ('155D4F2B1854B34FABCDE8CF20F1E44C', 493, 1, 12162, '2012-10-01T21:10:06Z', '2012-09-25T16:04:40Z', 'ALTER'),
  ('137C9BF2B1EFD34B8831ADA70C5F9431', 1, 1, 369543, '2011-12-08T13:41:04Z', '1899-12-30T05:00:00Z', 'DRAW'),
  ('7F29FC7114BD10468AE92A047345B5DB', 2, 1, 7258, '2011-12-08T13:41:04Z', '2011-10-20T05:00:00Z', 'DRAW'),
  ('6B66D8EAD88E6E4FA29401CD524B978A', 3, 1, 979321, '2011-12-08T13:41:04Z', '2011-11-08T05:00:00Z', 'DRAW'),
  ('7F393B712B213041A6DD211E04F6DCA6', 4, 1, 14998, '2012-04-20T15:16:21Z', '2012-04-18T21:07:07Z', 'DRAW'),
  ('2255F84E7C7DA04389765724872D6413', 5, 1, 58926, '2012-04-20T15:16:23Z', '2012-04-18T21:13:15Z', 'DRAW'),
  ('DB4A5588DEB9F34C868F7AD1CB13ACC3', 6, 1, 13232, '2012-04-20T15:16:05Z', '2012-04-18T21:17:00Z', 'DRAW'),
  ('B5231AE40F8E7D41BA0A4D09614CBDF9', 7, 1, 10176, '2012-04-20T15:16:25Z', '2012-04-18T21:19:41Z', 'DRAW'),
  ('2362D54FCC53E447AC7D8289EA89FD05', 8, 1, 17556, '2012-04-20T15:16:04Z', '2012-04-18T21:21:20Z', 'DRAW'),
  ('6ED4565CA041704B8D006EDA4A1E4CF9', 9, 1, 399639, '2012-05-30T16:32:43Z', '2012-05-17T06:00:00Z', 'DRAW'),
  ('B21BE07E3E42C2418C70AD17862D3AE1', 10, 1, 6231, '2012-08-16T16:55:00Z', '2012-08-02T16:02:03Z', 'DRAW'),
  ('8FD252A50137754A98698F93AC9B01A7', 11, 1, 629, '2012-08-16T16:54:58Z', '2012-08-02T16:07:57Z', 'DRAW'),
  ('1B9AFD2C20362F48A486E8A535B29AF5', 20, 1, -113810, '2011-12-13T17:15:53Z', '2010-02-10T05:00:00Z', 'UNKNOWN');

以下是查询:

SELECT 
    a.[GUID], 
    [positive_previous_total] = SUM(CASE WHEN b.Value>0 THEN b.Value ELSE 0 END), 
    [negative_previous_total] = SUM(CASE WHEN b.Value<0 THEN b.Value ELSE 0 END) 
FROM 
    #Changes a 
    LEFT OUTER JOIN #Changes b 
        ON 
        b.[GUID]    <> a.[GUID] AND
        b.Approved  = a.Approved AND
        b.Category  = a.Category 
        AND 
        (
        ISNULL(SUBSTRING(CASE WHEN b.Approved=1 THEN b.ApprovedDate ELSE b.IssuedDate END, 1, 10), '0000-00-00') 
          < ISNULL(SUBSTRING(CASE WHEN a.Approved=1 THEN a.ApprovedDate ELSE a.IssuedDate END, 1, 10), '0000-00-00')
        OR 
            (
            ISNULL(SUBSTRING(CASE WHEN b.Approved=1 THEN b.ApprovedDate ELSE b.IssuedDate END, 1, 10), '0000-00-00')
              =ISNULL(SUBSTRING(CASE WHEN a.Approved=1 THEN a.ApprovedDate ELSE a.IssuedDate END, 1, 10), '0000-00-00') 
            AND 
            b.DocumentNumber<a.DocumentNumber
            )
        ) 
GROUP BY a.[GUID]

这个记录的数量很快但是当扩展到700个记录时,我们需要花费几秒钟才能降低到0.5秒。

HERE IS A LIVE EXAMPLE OF THE SCHEMA AND SCRIPT ON SQL FIDDLE

4 个答案:

答案 0 :(得分:4)

就个人而言,我会利用Computed Columns来消除查询中的复杂性。

例如:你的表定义可能会变成(注意我也在这里更正了数据类型):

CREATE TABLE #Changes 
(
    [GUID] varchar(250), 
    Value numeric(36,6), 
    DocumentNumber varchar(250), 
    Approved numeric(36,6), -- Is there any reason this is not a BIT field?
    ApprovedDate datetime,
    ApprovedDate_NoTime AS (CASE WHEN ApprovedDate IS NULL THEN CONVERT(DATETIME, '0000-00-00') ELSE DATEADD(DAY, DATEDIFF(DAY, 0, ApprovedDate), 0) END) PERSISTED
    IssuedDate datetime, 
    IssuedDate_NoTime AS (CASE WHEN IssuedDate IS NULL THEN CONVERT(DATETIME, '0000-00-00') ELSE DATEADD(DAY, DATEDIFF(DAY, 0, IssuedDate), 0) END) PERSISTED
    ApprovedOrIssuedDate AS (CASE WHEN Approved = 1 THEN ApprovedDate_NoTime ELSE IssuedDate_NoTime) PERSISTED,
    Category varchar(250)
);

然后您可以将查询重新编写为:

SELECT 
    a.[GUID], 
    [positive_previous_total] = SUM(CASE WHEN b.Value>0 THEN b.Value ELSE 0 END), 
    [negative_previous_total] = SUM(CASE WHEN b.Value<0 THEN b.Value ELSE 0 END) 
FROM 
    #Changes a 
    LEFT OUTER JOIN #Changes b 
        ON 
        b.[GUID]    <> a.[GUID] AND
        b.Approved  = a.Approved AND
        b.Category  = a.Category 
        AND 
        (
            b.ApprovedOrIssuedDate < a.ApprovedOrIssuedDate
            OR 
            (
                b.ApprovedOrIssuedDate = a.ApprovedOrIssuedDate
                AND 
                b.DocumentNumber < a.DocumentNumber
            )
        ) 
GROUP BY a.[GUID]

为什么Approved不是一个字段?我已将数据类型更改为datetime,并为您提供了一个列,以将日期的时间部分清零。此外,没有经过测试,但你明白了。

我还会阅读thisthis(假设你想在比较中忽略日期时间的时间部分)

答案 1 :(得分:3)

好的,一些事情:

<强> 1。永远,永远将日期存储为字符串。

除非这确实是数据库中的#temporary表(在这种情况下还有一些其他问题/问题),否则绝对不能将真实日期存储为字符串。这将导致任何问题,但不会出现问题。

在30多年的咨询过程中,我看到数百个数据库的日期存储为字符串,其中每一个都有无效的日期字符串。

<强> 2。始终将适当的密钥和索引添加到表中。

这甚至适用于#Temp表,除非它们很小或者您确定它们不会有帮助。

在您的情况下,您可能应该在GUID上有唯一/主键。对于性能,您应该在{GUID,Approved,Category}(可能是Clustered)上有一个索引。

答案 2 :(得分:3)

我尝试了一些东西 - 这是独家新闻:

  • 我在临时表的新INT IDENTITY主键列上添加了聚集索引 - 是的,这似乎是反作用的,但在很多情况下,它实际上并非如此;它确实加速了许多事情 - 甚至插入和删除!请参阅Kimberly Tripp撰写的The Clustered Index Debate Continues...,了解为何会出现这种情况

  • 我还使ApprovedDateIssuedDate成为实际的DATETIME数据类型 - 而不是varchars。如果感觉像是约会,看起来像约会,像日期一样嘎嘎叫 - 那么它是约会,应该这样存储!

    请参阅Bad habits to kick : choosing the wrong data type - 您应该始终使用最合适的数据类型 - 毕竟这就是它们的用途!

  • 我在JOIN中用作外键的列添加了索引以加快查询速度

  • 我添加了一个计算列,如果它被批准,则封装整个,使用ApprovedDate,否则使用IssuedDate 逻辑一个地方 - 使查询更多更容易阅读!由于这是一个伪DATE(时间部分被清零),这基本上会处理你曾经(重复)所有丑陋的CONVERT / ISNULLSUBSTRING语句在查询中。

所以这是我更改的脚本来创建临时表

CREATE TABLE #Changes 
    (
    ID INT IDENTITY(1,1) NOT NULL PRIMARY KEY CLUSTERED,
    [GUID] varchar(250), 
    Value numeric(36,6), 
    DocumentNumber varchar(250), 
    Approved numeric(36,6), 
    ApprovedDate DATETIME,
    IssuedDate DATETIME, 
    Category varchar(250),

    ApprovedOrIssuedDate AS CASE 
                               WHEN Approved = 1 
                               THEN DATEADD(DAY, 0, DATEDIFF(DAY, 0, ApprovedDate))
                               ELSE DATEADD(DAY, 0, DATEDIFF(DAY, 0, IssuedDate))
                            END PERSISTED
    );

CREATE NONCLUSTERED INDEX IX_Index01 ON #Changes([GUID], Approved, Category) 
                                     INCLUDE(DocumentNumber, ApprovedDate, IssuedDate)

然后您的查询变得更加容易:

SELECT 
    a.[GUID], 
    [positive_previous_total] = SUM(CASE WHEN b.Value > 0 THEN b.Value ELSE 0 END), 
    [negative_previous_total] = SUM(CASE WHEN b.Value < 0 THEN b.Value ELSE 0 END) 
FROM 
    #Changes a 
LEFT OUTER JOIN 
    #Changes b ON b.[GUID] <> a.[GUID] 
               AND b.Approved  = a.Approved 
               AND b.Category  = a.Category 
               AND 
                  (b.ApprovedOrIssuedDate < a.ApprovedOrISsuedDate
                   OR 
                   (b.ApprovedOrIssuedDate = a.ApprovedOrIssuedDate
                    AND b.DocumentNumber < a.DocumentNumber)
                  ) 
GROUP BY 
     a.[GUID]

在我的测量中,我的查询成本得到了很大的改善(从0.022降到0.0146)

答案 3 :(得分:2)

从根本上说,你正试图做累积总和。在2012年之前的SQL Server版本中,您必须使用加入方式(或相关的子加载,应该具有类似的执行计划)来​​实现。我将您的查询简化为:

SELECT a.[GUID], 
       [positive_previous_total] = SUM(CASE WHEN b.Value>0 THEN b.Value ELSE 0 END), 
       [negative_previous_total] = SUM(CASE WHEN b.Value<0 THEN b.Value ELSE 0 END) 
FROM #Changes a LEFT OUTER JOIN
     #Changes b 
     ON b.[GUID]    <> a.[GUID] AND
         b.Approved  = a.Approved AND
         b.Category  = a.Category and
         ((b.ApprovedDate < a.ApprovedDate and a.Approved = 1) or
          (b.IssuedDate < a.IssuedDate and a.Approved <> 1)
         ) or
         ((b.ApprovedDate = a.ApprovedDate and a.Approved = 1 and b.DocumentNumber<a.DocumentNumber) or
          (b.IssuedDate = a.IssuedDate and a.Approved <> 1 and b.DocumentNumber<a.DocumentNumber)
         ) 
        ) 
GROUP BY a.[GUID]

isNULL是不必要的。在#Change的定义中,您应该简单地将值替换为您想要的值。另外,因为a.Approved = b。由连接条件批准,所以不需要case语句。

由于您正在创建#Changes表,因此应该向其添加DateDoc键。这将具有日期 - 定义为ApprovedDate或IssueDate或适当的NULL日期 - 其上连接有文档编号。文档编号应在左侧填充0。日期格式应为YYYYMMDD。

现在,您可以将from子句写为:

FROM #Changes a LEFT OUTER JOIN
     #Changes b 
     ON b.[GUID]    <> a.[GUID] AND
         b.Approved  = a.Approved AND
         b.Category  = a.Category and
         b.datedoc < a.datedoc

使用这种结构,我认为GUID,Approved,Category和Datedoc上的索引可以帮助查询。我不确定最后增加价值是否有帮助。但是,您的数据应该适合内存,因为您没有很多行。