优化子查询,求和和顶级函数

时间:2013-06-19 04:29:13

标签: sql subquery

这是我的查询

select   t.accountno  from tenant_info t where  
(
    select  sum(d.Bill_Amount) from billing_summary s , Billing_Detail d 
    where s.Id=d.Billing_Summary_ID and  s.id in 
      (select top 2 Id from Billing_Summary where Tenant_Info_ID = t.TenantId 
         order by bill_date desc)
) = 0

表中的记录数以百万计,这也就是为什么这个查询给出的结果是3分钟,然后应用程序超时。我认为第三个子查询需要写得更好但不能提出任何解决方案

有没有更好的解决方案?

1 个答案:

答案 0 :(得分:0)

来自@Mustafa的上述查询SQL语句显然更优雅,但这表现得更快。

设置测试数据:

    --/*
    SET NOCOUNT ON

    IF OBJECT_ID('Tenant_Info', 'U') IS NOT NULL DROP TABLE Tenant_Info;
    GO
    CREATE TABLE dbo.Tenant_Info (
        TenantId            int IDENTITY(1, 1)
       ,AccountNo           varchar(10)
    )
    GO
    IF OBJECT_ID('Billing_Summary', 'U') IS NOT NULL DROP TABLE Billing_Summary;
    GO
    CREATE TABLE dbo.Billing_Summary (
        Id                  int IDENTITY(1, 1)
       ,Tenant_Info_ID      int
       ,Bill_Date           date
    )
    GO
    IF OBJECT_ID('Billing_Detail', 'U') IS NOT NULL DROP TABLE Billing_Detail;
    GO
    CREATE TABLE dbo.Billing_Detail (
        Id                  int IDENTITY(1, 1)
       ,Billing_Summary_ID  int
       ,Bill_Amount         decimal(28, 10)
    )
    GO

    -- ====================================================================
    -- CREATE THE Tenant_Info TEST DATA
    -- Turn 1 row into 1,048,576
    INSERT Tenant_Info (AccountNo) SELECT ''

    DECLARE @Count int; SELECT @Count = 0;
    WHILE @Count < 20 -- 20: 1,048,576  10: 1,024
      BEGIN
        INSERT Tenant_Info (AccountNo)
        SELECT AccountNo FROM Tenant_Info

        SELECT @Count = (@Count + 1)
    END

    UPDATE TI SET TI.AccountNo = CONVERT(varchar(10), (1000000000 + TI.TenantId))
    -- SELECT COUNT(*) 
      FROM Tenant_Info TI

    CREATE NONCLUSTERED INDEX idx_Tenant_Info ON Tenant_Info (TenantId);
    -- ====================================================================



    -- ====================================================================
    -- CREATE THE Billing_Summary TEST DATA (12 MONTHS FOR EACH TENANT)
    DECLARE @Count2 int; SELECT @Count2 = 0;
    WHILE @Count2 < 12
      BEGIN
        INSERT Billing_Summary (Tenant_Info_ID, Bill_Date)
        SELECT 1, DATEADD(MONTH, @Count2, '08/01/2012')

        SELECT @Count2 = (@Count2 + 1)            
    END

    -- Ensure that the Max Bill Date has the lowest Billing_Summary Id to 
    -- avoid the assumption that the Ids are in order
    DECLARE @MaxBillDate date;
     SELECT @MaxBillDate = MAX(Bill_Date) FROM Billing_Summary

    INSERT Billing_Summary (Tenant_Info_ID, Bill_Date)
    SELECT TI.TenantId, BS.Bill_Date
      FROM Tenant_Info TI
      CROSS JOIN Billing_Summary BS
     WHERE TI.TenantId <> 1
       AND BS.Bill_Date = @MaxBillDate

    INSERT Billing_Summary (Tenant_Info_ID, Bill_Date)
    SELECT TI.TenantId, BS.Bill_Date
      FROM Tenant_Info TI
      CROSS JOIN Billing_Summary BS
     WHERE TI.TenantId <> 1
       AND BS.Bill_Date <> @MaxBillDate

    CREATE NONCLUSTERED INDEX idx_Billing_Summary_1 ON Billing_Summary (Id, Tenant_Info_ID);
    CREATE NONCLUSTERED INDEX idx_Billing_Summary_2 ON Billing_Summary (Tenant_Info_ID, Bill_Date);
    --SELECT COUNT(*) FROM Billing_Summary
    --SELECT COUNT(DISTINCT Tenant_Info_ID) FROM Billing_Summary
    --SELECT COUNT(DISTINCT Bill_Date) FROM Billing_Summary
    --SELECT * FROM Billing_Summary WHERE Tenant_Info_ID = 1
    -- ====================================================================



    -- ====================================================================
    -- CREATE THE Billing_Detail TEST DATA (2 LINES FOR EACH SUMMARY)
    INSERT Billing_Detail (Billing_Summary_ID, Bill_Amount)
    SELECT BS.Id, BS.Tenant_Info_ID as [Bill_Amount]
      FROM Billing_Summary BS

    INSERT Billing_Detail (Billing_Summary_ID, Bill_Amount)
    SELECT BS.Id, BS.Tenant_Info_ID as [Bill_Amount]
      FROM Billing_Summary BS


    DECLARE @MaxBillDateB date, @2ndMaxBillDateB date;
     SELECT @MaxBillDateB = MAX(Bill_Date) FROM Billing_Summary
     SELECT @2ndMaxBillDateB = MAX(Bill_Date) FROM Billing_Summary WHERE Bill_Date <> @MaxBillDateB

    UPDATE BD SET BD.Bill_Amount = 0
      FROM Billing_Detail BD
      JOIN Billing_Summary BS
        ON BD.Billing_Summary_ID = BS.Id
      JOIN (
            SELECT TOP 50 PERCENT A.* 
              FROM (
                    SELECT DISTINCT BS.Tenant_Info_ID
                      FROM Billing_Detail BD
                      JOIN Billing_Summary BS
                        ON BD.Billing_Summary_ID = BS.Id
                   ) A 
             ORDER BY NEWID()
           ) B
        ON BS.Tenant_Info_ID = B.Tenant_Info_ID
       AND BS.Bill_Date     IN (@2ndMaxBillDateB, @MaxBillDateB)

    CREATE NONCLUSTERED INDEX idx_Billing_Detail_1 ON Billing_Detail (Id);
    CREATE NONCLUSTERED INDEX idx_Billing_Detail_2 ON Billing_Detail (Billing_Summary_ID);
    -- SELECT BS.Tenant_Info_ID, SUM(BD.Bill_Amount) FROM Billing_Detail BD JOIN Billing_Summary BS ON BD.Billing_Summary_ID = BS.Id WHERE BD.Bill_Amount = 0 GROUP BY BS.Tenant_Info_ID
    -- ====================================================================

    SET NOCOUNT OFF
    --*/

运行查询脚本:

    /*        
    -- ORIGINAL QUERY TAKING 3 MINUTES BY THE QUESTION ASKER
    SELECT COUNT(*) -- On my system this runs in about ~25 seconds for a 524,288 row result
      FROM (
                select   t.accountno  from tenant_info t where  
                (
                    select  sum(d.Bill_Amount) from billing_summary s , Billing_Detail d 
                    where s.Id=d.Billing_Summary_ID and  s.id in 
                      (select top 2 Id from Billing_Summary where Tenant_Info_ID = t.TenantId 
                         order by bill_date desc)
                ) = 0
           ) A
    --*/


    -- Start off getting the max bill date for each tenant
    IF OBJECT_ID('tempdb..#Tenant_MaxBillDate', 'U') IS NOT NULL DROP TABLE #Tenant_MaxBillDate;
    CREATE TABLE #Tenant_MaxBillDate (Tenant_Info_ID int, Bill_Date date);

    INSERT #Tenant_MaxBillDate (Tenant_Info_ID, Bill_Date)
    SELECT BS.Tenant_Info_ID, MAX(BS.Bill_Date)
      FROM Billing_Summary BS
     GROUP BY BS.Tenant_Info_ID

    CREATE NONCLUSTERED INDEX idx_#Tenant_MaxBillDate ON #Tenant_MaxBillDate (Tenant_Info_ID, Bill_Date);


    -- Additionally, get the 2nd latest bill date for each tenant
    IF OBJECT_ID('tempdb..#Tenant_2ndToMaxBillDate', 'U') IS NOT NULL DROP TABLE #Tenant_2ndToMaxBillDate;
    CREATE TABLE #Tenant_2ndToMaxBillDate (Tenant_Info_ID int, Bill_Date date);

    INSERT #Tenant_2ndToMaxBillDate (Tenant_Info_ID, Bill_Date)
    SELECT BS.Tenant_Info_ID, MAX(BS.Bill_Date)
      FROM Billing_Summary BS
      LEFT JOIN #Tenant_MaxBillDate TM
        ON BS.Tenant_Info_ID = TM.Tenant_Info_ID
       AND BS.Bill_Date      = TM.Bill_Date
     WHERE TM.Tenant_Info_ID IS NULL
     GROUP BY BS.Tenant_Info_ID

    CREATE NONCLUSTERED INDEX idx_#Tenant_2ndToMaxBillDate ON #Tenant_2ndToMaxBillDate (Tenant_Info_ID, Bill_Date);


    -- This is necessary to avoid an incorrect assumpttion that the max summary id also has the latest bill date
    IF OBJECT_ID('tempdb..#Tenant_MaxBillSummary', 'U') IS NOT NULL DROP TABLE #Tenant_MaxBillSummary;
    CREATE TABLE #Tenant_MaxBillSummary (Tenant_Info_ID int, Billing_Summary_ID int);

    INSERT #Tenant_MaxBillSummary (Tenant_Info_ID, Billing_Summary_ID)
    SELECT BS.Tenant_Info_ID, MAX(BS.Id)
      FROM Billing_Summary BS
      JOIN #Tenant_MaxBillDate TM
        ON BS.Tenant_Info_ID = TM.Tenant_Info_ID
       AND BS.Bill_Date      = TM.Bill_Date
     GROUP BY BS.Tenant_Info_ID

     CREATE NONCLUSTERED INDEX idx_#Tenant_MaxBillSummary ON #Tenant_MaxBillSummary (Tenant_Info_ID, Billing_Summary_ID);


    -- This is necessary in case the 2nd max summary id is also on the latest bill date
    IF OBJECT_ID('tempdb..#Tenant_2ndToMaxBillSummary_SameDate', 'U') IS NOT NULL DROP TABLE #Tenant_2ndToMaxBillSummary_SameDate;
    CREATE TABLE #Tenant_2ndToMaxBillSummary_SameDate (Tenant_Info_ID int, Billing_Summary_ID int);

    INSERT #Tenant_2ndToMaxBillSummary_SameDate (Tenant_Info_ID, Billing_Summary_ID)
    SELECT BS.Tenant_Info_ID, MAX(BS.Id)
      FROM Billing_Summary BS
      JOIN #Tenant_MaxBillDate TM
        ON BS.Tenant_Info_ID = TM.Tenant_Info_ID
       AND BS.Bill_Date      = TM.Bill_Date
      LEFT JOIN #Tenant_MaxBillSummary TMM
        ON BS.Id             = TMM.Billing_Summary_ID
       AND BS.Tenant_Info_ID = TMM.Tenant_Info_ID
     WHERE TMM.Billing_Summary_ID IS NULL
     GROUP BY BS.Tenant_Info_ID

    CREATE NONCLUSTERED INDEX idx_#Tenant_2ndToMaxBillSummary_SameDate ON #Tenant_2ndToMaxBillSummary_SameDate (Tenant_Info_ID, Billing_Summary_ID);


    -- This will probably get the majority of cases where the 2nd max summary id is on the 2nd latest bill date
    IF OBJECT_ID('tempdb..#Tenant_2ndToMaxBillSummary_2ndDate', 'U') IS NOT NULL DROP TABLE #Tenant_2ndToMaxBillSummary_2ndDate;
    CREATE TABLE #Tenant_2ndToMaxBillSummary_2ndDate (Tenant_Info_ID int, Billing_Summary_ID int);

    INSERT #Tenant_2ndToMaxBillSummary_2ndDate (Tenant_Info_ID, Billing_Summary_ID)
    SELECT BS.Tenant_Info_ID, MAX(BS.Id)
      FROM Billing_Summary BS
      JOIN #Tenant_2ndToMaxBillDate TM
        ON BS.Tenant_Info_ID     = TM.Tenant_Info_ID
       AND BS.Bill_Date          = TM.Bill_Date
      LEFT JOIN #Tenant_MaxBillSummary TMS
        ON BS.Id                 = TMS.Billing_Summary_ID
      LEFT JOIN #Tenant_2ndToMaxBillSummary_SameDate TMS2
        ON BS.Tenant_Info_ID     = TMS2.Tenant_Info_ID
     WHERE 1=1
       AND TMS.Billing_Summary_ID IS NULL -- Do not choose the same summary ID
       AND TMS2.Tenant_Info_ID    IS NULL -- Do not choose a tenant that has to summary entries for their max date
     GROUP BY BS.Tenant_Info_ID

    CREATE NONCLUSTERED INDEX idx_#Tenant_2ndToMaxBillSummary_2ndDate ON #Tenant_2ndToMaxBillSummary_2ndDate (Tenant_Info_ID, Billing_Summary_ID);


    IF OBJECT_ID('tempdb..#Tenant_Top2BillSummaryRecords', 'U') IS NOT NULL DROP TABLE #Tenant_Top2BillSummaryRecords;
    CREATE TABLE #Tenant_Top2BillSummaryRecords (Tenant_Info_ID int, Billing_Summary_ID int);

    INSERT #Tenant_Top2BillSummaryRecords (Tenant_Info_ID, Billing_Summary_ID)
    SELECT Tenant_Info_ID, Billing_Summary_ID FROM #Tenant_MaxBillSummary
    UNION ALL
    SELECT Tenant_Info_ID, Billing_Summary_ID FROM #Tenant_2ndToMaxBillSummary_SameDate
    UNION ALL
    SELECT Tenant_Info_ID, Billing_Summary_ID FROM #Tenant_2ndToMaxBillSummary_2ndDate

    CREATE NONCLUSTERED INDEX idx_#Tenant_Top2BillSummaryRecords ON #Tenant_Top2BillSummaryRecords (Tenant_Info_ID, Billing_Summary_ID);
    CREATE NONCLUSTERED INDEX idx_#Tenant_Top2BillSummaryRecords2 ON #Tenant_Top2BillSummaryRecords (Billing_Summary_ID, Tenant_Info_ID);


    SELECT COUNT(*) -- On my system this runs in about ~14 seconds for a 524,288 row result
      FROM (
            SELECT TI.TenantId, TI.AccountNo
              FROM #Tenant_Top2BillSummaryRecords A 
              JOIN Tenant_Info TI
                ON A.Tenant_Info_ID = TI.TenantId
              JOIN Billing_Summary BS
                ON A.Billing_Summary_ID = BS.Id
              JOIN Billing_Detail BD
                ON BS.Id             = BD.Billing_Summary_ID
             GROUP BY TI.TenantId, TI.AccountNo
            HAVING SUM(BD.Bill_Amount) = 0
           ) A