大数据集聚合查询的优化

时间:2017-09-26 12:26:01

标签: sql-server tsql optimization sql-server-2014

我有以下表格[这里是SQL Fiddle]

CREATE TABLE TmpB
    ([EpiSer] int, [SINum] int, [VolNum] int, [CTPQty] int, [VolAmt] int, [CTPActivityGroupCode] varchar(6))
;

INSERT INTO TmpB
    ([EpiSer], [SINum], [VolNum], [CTPQty], [VolAmt], [CTPActivityGroupCode])
VALUES
    (104009, 21, 1, 1, 1, 'SUP001'),
    (42698, 21, 1, 1, 1, 'SUP001'),
    (82878, 21, 1, 1, 1, 'SUP001'),
    (94081, 21, 1, 1, 1, 'SUP001'),
    (80234, 21, 1, 1, 1, 'SUP001'),
    (81856, 21, 1, 1, 1, 'SUP001'),
    (19752, 21, 1, 1, 1, 'SUP001'),
    (17830, 21, 1, 1, 1, 'SUP001'),
    (73218, 21, 1, 1, 1, 'SUP001'),
    (32713, 21, 1, 1, 1, 'SUP001'),
    (90162, 21, 1, 1, 1, 'SUP001'),
    (59414, 21, 1, 1, 1, 'SUP001'),
    (63765, 21, 1, 1, 1, 'SUP001'),
    (26554, 21, 1, 1, 1, 'SUP001'),
    (72489, 21, 1, 1, 1, 'SUP001'),
    (94324, 21, 1, 1, 1, 'SUP001'),
    (34453, 21, 1, 1, 1, 'SUP001'),
    (101487, 21, 1, 1, 1, 'SUP001'),
    (21117, 21, 1, 1, 1, 'SUP001'),
    (4169, 21, 1, 1, 1, 'SUP001'),
    (27258, 21, 1, 1, 1, 'SUP001'),
    (85282, 21, 1, 1, 1, 'SUP001'),
    (98243, 21, 1, 1, 1, 'SUP001'),
    (98436, 21, 1, 1, 1, 'SUP001'),
    (15969, 21, 1, 1, 1, 'SUP001'),
    (70324, 21, 1, 1, 1, 'SUP001'),
    (9828, 21, 1, 1, 1, 'SUP001'),
    (61929, 21, 1, 1, 1, 'SUP001'),
    (3508, 21, 1, 1, 1, 'SUP001'),
    (65344, 21, 1, 1, 1, 'SUP001'),
    (19377, 21, 1, 1, 1, 'SUP001'),
    (104502, 21, 1, 1, 1, 'SUP001'),
    (57413, 21, 1, 1, 1, 'SUP001'),
    (66841, 21, 1, 1, 1, 'SUP001'),
    (39590, 21, 1, 1, 1, 'SUP001'),
    (50640, 21, 1, 1, 1, 'SUP001'),
    (66934, 21, 1, 1, 1, 'SUP001'),
    (36675, 21, 1, 1, 1, 'SUP001'),
    (65101, 21, 1, 1, 1, 'SUP001'),
    (84464, 21, 1, 1, 1, 'SUP001'),
    (74118, 21, 1, 1, 1, 'SUP001'),
    (40101, 21, 1, 1, 1, 'SUP001'),
    (18816, 21, 1, 1, 1, 'SUP001'),
    (61936, 21, 1, 1, 1, 'SUP001'),
    (23153, 21, 1, 1, 1, 'SUP001'),
    (50947, 21, 1, 1, 1, 'SUP001'),
    (39776, 21, 1, 1, 1, 'SUP001'),
    (31377, 21, 1, 1, 1, 'SUP001'),
    (106624, 21, 1, 1, 1, 'SUP001'),
    (41255, 21, 2, 1, 1, 'SUP001'),
    (82878, 21, 2, 1, 1, 'SUP001'),
    (94081, 21, 2, 1, 1, 'SUP001'),
    (80234, 21, 2, 1, 1, 'SUP001'),
    (81856, 21, 2, 1, 1, 'SUP001'),
    (19752, 21, 2, 1, 1, 'SUP001'),
    (17830, 21, 2, 1, 1, 'SUP001'),
    (73218, 21, 2, 1, 1, 'SUP001'),
    (32713, 21, 2, 1, 1, 'SUP001'),
    (90162, 21, 2, 1, 1, 'SUP001'),
    (59414, 21, 2, 1, 1, 'SUP001'),
    (63765, 21, 2, 1, 1, 'SUP001'),
    (26554, 21, 2, 1, 1, 'SUP001'),
    (72489, 21, 2, 1, 1, 'SUP001'),
    (94324, 21, 2, 1, 1, 'SUP001'),
    (34453, 21, 2, 1, 1, 'SUP001')
;


CREATE TABLE TmpA
    ([VolNum] int, [CostItem] varchar(15), [OCostUnit] numeric, [FCostUnit] numeric, [ResCstID] varchar(6))
;

INSERT INTO TmpA
    ([VolNum], [CostItem], [OCostUnit], [FCostUnit], [ResCstID])
VALUES
    (1, 'AgencyOtherOH_V', 0.000155982435905091, 0.000155982435905091, 'CSC004'),
    (1, 'CNST_F', 0.0166158829887076, 0.0166158829887076, 'CSC001'),
    (1, 'Depreciation_F', 0.00044046833444276, 0.00338301131105729, 'CSC004'),
    (1, 'GSOH_F', 0.002285123323198, 0.002285123323198, 'CSC004'),
    (1, 'GSOH_S', 0.00291555947552205, 0.00291555947552205, 'CSC004'),
    (1, 'GSOH_V', -7.85518770596873E-05, -7.85518770596873E-05, 'CSC004'),
    (1, 'GSPayrollOH_S', 1.95806910092635E-05, 1.95806910092635E-05, 'CSC004'),
    (1, 'INC001ET005', 0.000124265914489854, 0.000124265914489854, 'REC001'),
    (1, 'PDC_F', 0.000897633427026484, 0.000897633427026484, 'CSC004'),
    (1, 'R&D_F', 0, 0.0911004017527717, 'REC002'),
    (1, 'R&D_S', 0, 0.184788054915322, 'REC002'),
    (1, 'R&D_V', 0, -1.04534066995235, 'REC002'),
    (1, 'R&DSW_S', 0, 1.7173336734415, 'REC002'),
    (1, 'R&DSW_V', 0, 0.159935865317166, 'REC002'),
    (1, 'SWOtherOH_F', 0.000101979332044079, 0.000101979332044079, 'CSC004'),
    (1, 'SWOtherOH_S', 0.00997174733301197, 0.00997174733301197, 'CSC004'),
    (1, 'SWOtherOH_V', 0.000224400291272709, 0.000224400291272709, 'CSC004'),
    (2, 'AgencyOtherOH_V', 0.000155982435905091, 0.000155982435905091, 'CSC004'),
    (2, 'CNST_F', 0.0166158829887076, 0.0166158829887076, 'CSC001'),
    (2, 'Depreciation_F', 0.00044046833444276, 0.00338301131105729, 'CSC004'),
    (2, 'GSOH_F', 0.002285123323198, 0.002285123323198, 'CSC004'),
    (2, 'GSOH_S', 0.00291555947552205, 0.00291555947552205, 'CSC004'),
    (2, 'GSOH_V', -7.85518770596873E-05, -7.85518770596873E-05, 'CSC004'),
    (2, 'GSPayrollOH_S', 1.95806910092635E-05, 1.95806910092635E-05, 'CSC004'),
    (2, 'INC001ET005', 0.000124265914489854, 0.000124265914489854, 'REC001'),
    (2, 'PDC_F', 0.000897633427026484, 0.000897633427026484, 'CSC004'),
    (2, 'R&D_F', 0, 0.0911004017527717, 'REC002'),
    (2, 'R&D_S', 0, 0.184788054915322, 'REC002'),
    (2, 'R&D_V', 0, -1.04534066995235, 'REC002'),
    (2, 'R&DSW_S', 0, 1.7173336734415, 'REC002'),
    (2, 'R&DSW_V', 0, 0.159935865317166, 'REC002'),
    (2, 'SWOtherOH_F', 0.000101979332044079, 0.000101979332044079, 'CSC004'),
    (2, 'SWOtherOH_S', 0.00997174733301197, 0.00997174733301197, 'CSC004'),
    (2, 'SWOtherOH_V', 0.000224400291272709, 0.000224400291272709, 'CSC004')
;

我需要使用TmpA汇总TmpB中记录的成本。为此,我使用以下查询[对于“普通”大小的表格一直正常工作]

SELECT [s1].[EpiSer] as ActivityRecordID, 
       [s1].[CTPActivityGroupCode] as ActCstID, 
       [t].[ResCstID], 
       [s1].[VolAmt], 
       [s1].[CTPQty] AS ActCnt, 
       SUM([s1].[VolAmt] * [t].[OCostUnit]) AS TotOCst, 
       SUM([s1].[VolAmt] * [t].[FCostUnit]) AS TotFCst  
FROM [TmpB] AS s1 
    INNER JOIN 
        TmpA AS t ON [s1].[VolNum] = [t].[VolNum] 
GROUP BY [s1].[EpiSer], 
         [s1].[CTPActivityGroupCode], 
         [t].[ResCstID], 
         [s1].[VolAmt], 
         [s1].[CTPQty];

现在,问题在于,对于我们目前的情况,表TmpB是一个庞大的26M记录,而TmpA是140,000。这对我们来说很不寻常,但我们无能为力。由于聚合和在非唯一列上使用连接,上述查询导致tempdb膨胀到查询因[应用程序日志文件中的条目]而停止的点:

  

2017-09-26 06:11:07.027 ERROR :: r.d():由于'ACTIVE_TRANSACTION',数据库'tempdb'的事务日志已满。      在System.Data.SqlClient.SqlConnection.OnError(SqlException异常,Boolean breakConnection,Action`1 wrapCloseInAction)      ...

我们如何修改/优化查询以避免此问题 - 我尝试过索引,但这是我能做的全部吗?

CREATE NONCLUSTERED INDEX [IX_TmpB] 
ON [TmpB] ([EpiSer], [VolNum]);  

CREATE NONCLUSTERED INDEX [IX_TmpA] 
ON [TmpA] ([VolNum]);  

尝试解决#1:

我使用索引

CREATE NONCLUSTERED INDEX [IX_ServDataCtp_Stage1] 
ON [ServDataCtp_Stage1] ([VolNum])
INCLUDE ([EpiSer], [CTPQty], [VolAmt], [CTPActivityGroupCode]);
GO

我需要对执行进行全面尝试,看看这是否可以解决问题。

尝试解决#2:

然后我尝试使用Alan对INDEXED VIEWS的建议

SET NUMERIC_ROUNDABORT OFF;  
SET ANSI_PADDING, ANSI_WARNINGS, CONCAT_NULL_YIELDS_NULL, 
     ARITHABORT, QUOTED_IDENTIFIER, ANSI_NULLS ON;  
GO  

CREATE VIEW vB   
WITH SCHEMABINDING  
AS  
    SELECT [ServDataCtp_Stage1].[EpiSer] as ActivityRecordID, 
           [ServDataCtp_Stage1].[CTPActivityGroupCode] as ActCstID, 
           [TmpA].[ResCstID], 
           [ServDataCtp_Stage1].[VolAmt], 
           [ServDataCtp_Stage1].[CTPQty] AS ActCnt, 
           SUM([ServDataCtp_Stage1].[VolAmt] * [TmpA].[OCostUnit]) AS TotOCst, 
           SUM([ServDataCtp_Stage1].[VolAmt] * [TmpA].[FCostUnit]) AS TotFCst, 
           COUNT_BIG(*) AS _COUNT 
    FROM [ServDataCtp_Stage1] INNER JOIN 
            [TmpA] ON [ServDataCtp_Stage1].[VolNum] = [TmpA].[VolNum] 
    GROUP BY [ServDataCtp_Stage1].[EpiSer], 
             [ServDataCtp_Stage1].[CTPActivityGroupCode], 
             [TmpA].[ResCstID], 
             [ServDataCtp_Stage1].[VolAmt], 
             [ServDataCtp_Stage1].[CTPQty];
GO

但是第二个查询会出现以下错误:

  

Msg 4512,Level 16,State 3,Procedure vB,Line 4 [Batch Start Line 360​​]   无法模式绑定视图'vB',因为名称'ServDataCtp_Stage1'对于模式绑定无效。名称必须采用两部分格式,对象不能引用自身。

你能帮忙解决这个问题吗?

1 个答案:

答案 0 :(得分:2)

ResCstId让事情变得更复杂一些。 GROUP BY会导致排序。从多个表中对列进行分组使得从任何索引中获益都变得更加困难。我打赌你的执行计划中有一个或多个非常重要的。

如果你不需要resCstId,你可以在EpiSer,CTPActivityGroupCode,VolAmt,CTPQty上抛出非聚集索引,这会大大加快速度。

如果需要ResCstId,那么您可以将查询转换为索引视图;它们是为这种东西而设计的。您需要包含一个count_big列(它是一个奇怪的SQL Server索引视图)。有关详细信息,请参阅此链接:https://docs.microsoft.com/en-us/sql/relational-databases/views/create-indexed-views

-----更新-----

创建一个索引视图比我想象的要复杂一点,但我做到了。 旁注:这就是发布DDL和示例数据非常有用的原因。

首先让我们来看看我们试图解决的问题。使用提供的DDL,查询创建此执行计划: enter image description here 那种是罪魁祸首。如果您将鼠标悬停在它上面,您将获得更多洞察力: enter image description here

创建索引视图的问题是我们需要一组您不具备的唯一列(基于您发布的DDL)。为了解决这个问题,我在每个表中添加了一个代理键列并命名为#34; someId"在每个表中。这可能是一个交易破坏者,但至少我在这里展示的是这个解决方案将如何运作。

这里是更新后的DDL:

if object_id('dbo.tmpA') is not null drop table tmpA;
if object_id('dbo.tmpB') is not null drop table tmpB;

CREATE TABLE TmpB
    (someid int identity, [EpiSer] int, [SINum] int, [VolNum] int, [CTPQty] int, [VolAmt] int, [CTPActivityGroupCode] varchar(6))
;
INSERT INTO TmpB
    ([EpiSer], [SINum], [VolNum], [CTPQty], [VolAmt], [CTPActivityGroupCode])
VALUES
    (104009, 21, 1, 1, 1, 'SUP001'),
    (42698, 21, 1, 1, 1, 'SUP001'),
    (82878, 21, 1, 1, 1, 'SUP001'),
    (94081, 21, 1, 1, 1, 'SUP001'),
    (80234, 21, 1, 1, 1, 'SUP001'),
    (81856, 21, 1, 1, 1, 'SUP001'),
    (19752, 21, 1, 1, 1, 'SUP001'),
    (17830, 21, 1, 1, 1, 'SUP001'),
    (73218, 21, 1, 1, 1, 'SUP001'),
    (32713, 21, 1, 1, 1, 'SUP001'),
    (90162, 21, 1, 1, 1, 'SUP001'),
    (59414, 21, 1, 1, 1, 'SUP001'),
    (63765, 21, 1, 1, 1, 'SUP001'),
    (26554, 21, 1, 1, 1, 'SUP001'),
    (72489, 21, 1, 1, 1, 'SUP001'),
    (94324, 21, 1, 1, 1, 'SUP001'),
    (34453, 21, 1, 1, 1, 'SUP001'),
    (101487, 21, 1, 1, 1, 'SUP001'),
    (21117, 21, 1, 1, 1, 'SUP001'),
    (4169, 21, 1, 1, 1, 'SUP001'),
    (27258, 21, 1, 1, 1, 'SUP001'),
    (85282, 21, 1, 1, 1, 'SUP001'),
    (98243, 21, 1, 1, 1, 'SUP001'),
    (98436, 21, 1, 1, 1, 'SUP001'),
    (15969, 21, 1, 1, 1, 'SUP001'),
    (70324, 21, 1, 1, 1, 'SUP001'),
    (9828, 21, 1, 1, 1, 'SUP001'),
    (61929, 21, 1, 1, 1, 'SUP001'),
    (3508, 21, 1, 1, 1, 'SUP001'),
    (65344, 21, 1, 1, 1, 'SUP001'),
    (19377, 21, 1, 1, 1, 'SUP001'),
    (104502, 21, 1, 1, 1, 'SUP001'),
    (57413, 21, 1, 1, 1, 'SUP001'),
    (66841, 21, 1, 1, 1, 'SUP001'),
    (39590, 21, 1, 1, 1, 'SUP001'),
    (50640, 21, 1, 1, 1, 'SUP001'),
    (66934, 21, 1, 1, 1, 'SUP001'),
    (36675, 21, 1, 1, 1, 'SUP001'),
    (65101, 21, 1, 1, 1, 'SUP001'),
    (84464, 21, 1, 1, 1, 'SUP001'),
    (74118, 21, 1, 1, 1, 'SUP001'),
    (40101, 21, 1, 1, 1, 'SUP001'),
    (18816, 21, 1, 1, 1, 'SUP001'),
    (61936, 21, 1, 1, 1, 'SUP001'),
    (23153, 21, 1, 1, 1, 'SUP001'),
    (50947, 21, 1, 1, 1, 'SUP001'),
    (39776, 21, 1, 1, 1, 'SUP001'),
    (31377, 21, 1, 1, 1, 'SUP001'),
    (106624, 21, 1, 1, 1, 'SUP001'),
    (41255, 21, 2, 1, 1, 'SUP001'),
    (82878, 21, 2, 1, 1, 'SUP001'),
    (94081, 21, 2, 1, 1, 'SUP001'),
    (80234, 21, 2, 1, 1, 'SUP001'),
    (81856, 21, 2, 1, 1, 'SUP001'),
    (19752, 21, 2, 1, 1, 'SUP001'),
    (17830, 21, 2, 1, 1, 'SUP001'),
    (73218, 21, 2, 1, 1, 'SUP001'),
    (32713, 21, 2, 1, 1, 'SUP001'),
    (90162, 21, 2, 1, 1, 'SUP001'),
    (59414, 21, 2, 1, 1, 'SUP001'),
    (63765, 21, 2, 1, 1, 'SUP001'),
    (26554, 21, 2, 1, 1, 'SUP001'),
    (72489, 21, 2, 1, 1, 'SUP001'),
    (94324, 21, 2, 1, 1, 'SUP001'),
    (34453, 21, 2, 1, 1, 'SUP001')
;
CREATE TABLE TmpA
    (someid int identity, [VolNum] int, [CostItem] varchar(15), [OCostUnit] numeric, [FCostUnit] numeric, [ResCstID] varchar(6))
;
INSERT INTO TmpA
    ([VolNum], [CostItem], [OCostUnit], [FCostUnit], [ResCstID])
VALUES
    (1, 'AgencyOtherOH_V', 0.000155982435905091, 0.000155982435905091, 'CSC004'),
    (1, 'CNST_F', 0.0166158829887076, 0.0166158829887076, 'CSC001'),
    (1, 'Depreciation_F', 0.00044046833444276, 0.00338301131105729, 'CSC004'),
    (1, 'GSOH_F', 0.002285123323198, 0.002285123323198, 'CSC004'),
    (1, 'GSOH_S', 0.00291555947552205, 0.00291555947552205, 'CSC004'),
    (1, 'GSOH_V', -7.85518770596873E-05, -7.85518770596873E-05, 'CSC004'),
    (1, 'GSPayrollOH_S', 1.95806910092635E-05, 1.95806910092635E-05, 'CSC004'),
    (1, 'INC001ET005', 0.000124265914489854, 0.000124265914489854, 'REC001'),
    (1, 'PDC_F', 0.000897633427026484, 0.000897633427026484, 'CSC004'),
    (1, 'R&D_F', 0, 0.0911004017527717, 'REC002'),
    (1, 'R&D_S', 0, 0.184788054915322, 'REC002'),
    (1, 'R&D_V', 0, -1.04534066995235, 'REC002'),
    (1, 'R&DSW_S', 0, 1.7173336734415, 'REC002'),
    (1, 'R&DSW_V', 0, 0.159935865317166, 'REC002'),
    (1, 'SWOtherOH_F', 0.000101979332044079, 0.000101979332044079, 'CSC004'),
    (1, 'SWOtherOH_S', 0.00997174733301197, 0.00997174733301197, 'CSC004'),
    (1, 'SWOtherOH_V', 0.000224400291272709, 0.000224400291272709, 'CSC004'),
    (2, 'AgencyOtherOH_V', 0.000155982435905091, 0.000155982435905091, 'CSC004'),
    (2, 'CNST_F', 0.0166158829887076, 0.0166158829887076, 'CSC001'),
    (2, 'Depreciation_F', 0.00044046833444276, 0.00338301131105729, 'CSC004'),
    (2, 'GSOH_F', 0.002285123323198, 0.002285123323198, 'CSC004'),
    (2, 'GSOH_S', 0.00291555947552205, 0.00291555947552205, 'CSC004'),
    (2, 'GSOH_V', -7.85518770596873E-05, -7.85518770596873E-05, 'CSC004'),
    (2, 'GSPayrollOH_S', 1.95806910092635E-05, 1.95806910092635E-05, 'CSC004'),
    (2, 'INC001ET005', 0.000124265914489854, 0.000124265914489854, 'REC001'),
    (2, 'PDC_F', 0.000897633427026484, 0.000897633427026484, 'CSC004'),
    (2, 'R&D_F', 0, 0.0911004017527717, 'REC002'),
    (2, 'R&D_S', 0, 0.184788054915322, 'REC002'),
    (2, 'R&D_V', 0, -1.04534066995235, 'REC002'),
    (2, 'R&DSW_S', 0, 1.7173336734415, 'REC002'),
    (2, 'R&DSW_V', 0, 0.159935865317166, 'REC002'),
    (2, 'SWOtherOH_F', 0.000101979332044079, 0.000101979332044079, 'CSC004'),
    (2, 'SWOtherOH_S', 0.00997174733301197, 0.00997174733301197, 'CSC004'),
    (2, 'SWOtherOH_V', 0.000224400291272709, 0.000224400291272709, 'CSC004')
;
GO

接下来是视图和索引:

CREATE VIEW dbo.vwTmpAB
WITH SCHEMABINDING AS
SELECT id1 = s1.someid,
       id2 = t.someid,
       s1.[EpiSer] as ActivityRecordID, 
       s1.[CTPActivityGroupCode] as ActCstID, 
       t.[ResCstID], 
       s1.[VolAmt],
       s1.[CTPQty] AS ActCnt,
       t.OCostUnit,
       t.FCostUnit
FROM dbo.TmpB AS s1
INNER JOIN dbo.TmpA AS t ON [s1].[VolNum] = [t].[VolNum];
GO
CREATE UNIQUE CLUSTERED INDEX uq_cl_vwTempAB 
  ON dbo.vwTmpAB(ActivityRecordID, ActCstID, ResCstID, VolAmt, ActCnt, id1, id2);
GO

现在我们对视图进行聚合,如下所示。这里真正的神奇之处在于,当我运行您的原始查询时,优化器足够聪明,即使它从未被引用,也可以在视图上使用索引:

SELECT
  ActivityRecordID, 
  ActCstID, 
  ResCstID, 
  VolAmt, 
  ActCnt,
  SUM(VolAmt * OCostUnit) AS TotOCst, 
  SUM(VolAmt * FCostUnit) AS TotFCst  
FROM dbo.vwTmpAB
GROUP BY ActivityRecordID, ActCstID, ResCstID, VolAmt, ActCnt;

SELECT [s1].[EpiSer] as ActivityRecordID, 
       [s1].[CTPActivityGroupCode] as ActCstID, 
       [t].[ResCstID], 
       [s1].[VolAmt], 
       [s1].[CTPQty] AS ActCnt, 
       SUM([s1].[VolAmt] * [t].[OCostUnit]) AS TotOCst, 
       SUM([s1].[VolAmt] * [t].[FCostUnit]) AS TotFCst  
FROM [TmpB] AS s1 
    INNER JOIN 
        TmpA AS t ON [s1].[VolNum] = [t].[VolNum] 
GROUP BY [s1].[EpiSer], 
         [s1].[CTPActivityGroupCode], 
         [t].[ResCstID], 
         [s1].[VolAmt], 
         [s1].[CTPQty];

两个查询都返回相同的结果,并使用更好的查询计划。排序消失了。我包含了您的原始查询,以演示在某些情况下,您甚至不需要引用视图来利用索引。

enter image description here

如果这有帮助,请告诉我。