改进查询查看多个表是否存在

时间:2018-03-08 20:19:01

标签: sql-server tsql query-performance

鉴于以下表格定义:

CREATE TABLE [dbo].[ConsolidatedRecords]
(
    [SessionKey]   UNIQUEIDENTIFIER NOT NULL PRIMARY KEY CLUSTERED
  , [EntityID]     UNIQUEIDENTIFIER NOT NULL FOREIGN KEY REFERENCES [dbo].        [EntityList] ( EntityID ) 
  , [EntityName]   NVARCHAR(128)    NOT NULL
  , [SurrogateKey] UNIQUEIDENTIFIER NOT NULL
  , [RecordID]     UNIQUEIDENTIFIER NOT NULL
  , PRIMARY KEY ( [SessionKey], [EntityName], [RecordID], [SurrogateKey] ) CLUSTERED
);
GO

CREATE #CurrentSession TABLE ([SessionKey] UNIQUEIDENTIFIER NOT NULL);

INSERT INTO #CurrentSession VALUES (NEWID());

-- ... long, involved process to populate ConsolidatedRecords

我有一个针对ConsolidatedRecords表的查询,该表检查是否存在可用的特定实体记录,但难以维护并且坦率地说是丑陋的。我试图解决这些问题,但我没有成功,所以我转向你:

SELECT [SessionKey]   = records.[SessionKey]
     , [SurrogateKey] = records.[SurrogateKey]
  FROM [dbo].[ConsolidatedRecords] records
  JOIN #CurrentSession             session ON records.[SessionKey] = session.[SessionKey]
 WHERE ( EXISTS( SELECT 1 FROM [dbo].[Entity1] one   WHERE records.RecordID = one.[Entity1ID] AND records.[EntityName] = N'Entity1' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity2] two   WHERE records.RecordID = two.[Entity2ID] AND records.[EntityName] = N'Entity2' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity3] three WHERE records.RecordID = two.[Entity3ID] AND records.[EntityName] = N'Entity3' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity4] four  WHERE records.RecordID = two.[Entity4ID] AND records.[EntityName] = N'Entity4' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity5] five  WHERE records.RecordID = two.[Entity5ID] AND records.[EntityName] = N'Entity5' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity6] six   WHERE records.RecordID = two.[Entity6ID] AND records.[EntityName] = N'Entity6' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity7] seven WHERE records.RecordID = two.[Entity7ID] AND records.[EntityName] = N'Entity7' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity8] eight WHERE records.RecordID = two.[Entity8ID] AND records.[EntityName] = N'Entity8' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity9] nine  WHERE records.RecordID = two.[Entity9ID] AND records.[EntityName] = N'Entity9' )
       );

其中一个问题是,在实际情况中,有超过九个实体存在问题。

我尝试了以下方法,但性能更差 - 其中一些实体表非常大 - 超过100,000条记录。在所有情况下,我都在查询主键。

       SELECT [SessionKey]   = records.[SessionKey]
            , [SurrogateKey] = records.[SurrogateKey]     
           FROM [dbo].[ConsolidatedRecords] records
           JOIN #CurrentSession             session ON records.[SessionKey] = session.[SessionKey]
LEFT OUTER JOIN [dbo].[Entity1]             one     ON records.RecordID     = one.[Entity1ID]      AND records.[EntityName] = N'Entity1'
LEFT OUTER JOIN [dbo].[Entity2]             two     ON records.RecordID     = two.[Entity2ID]      AND records.[EntityName] = N'Entity2'
LEFT OUTER JOIN [dbo].[Entity3]             three   ON records.RecordID     = three.[Entity3ID]    AND records.[EntityName] = N'Entity3'
LEFT OUTER JOIN [dbo].[Entity4]             four    ON records.RecordID     = four.[Entity4ID]     AND records.[EntityName] = N'Entity4'
LEFT OUTER JOIN [dbo].[Entity5]             five    ON records.RecordID     = five.[Entity5ID]     AND records.[EntityName] = N'Entity5'
LEFT OUTER JOIN [dbo].[Entity6]             six     ON records.RecordID     = six.[Entity6D]       AND records.[EntityName] = N'Entity6'
LEFT OUTER JOIN [dbo].[Entity7]             seven   ON records.RecordID     = seven.[Entity7ID]    AND records.[EntityName] = N'Entity7'
LEFT OUTER JOIN [dbo].[Entity8]             eight   ON records.RecordID     = eight.[Entity8ID]    AND records.[EntityName] = N'Entity8'
LEFT OUTER JOIN [dbo].[Entity9]             nine    ON records.RecordID     = nine.[Entity9ID]     AND records.[EntityName] = N'Entity9'
          WHERE one.[Entity1]   IS NOT NULL
             OR two.[Entity2]   IS NOT NULL
             OR three.[Entity2] IS NOT NULL
             OR four.[Entity2]  IS NOT NULL
             OR five.[Entity2]  IS NOT NULL
             OR six.[Entity2]   IS NOT NULL
             OR seven.[Entity2] IS NOT NULL
             OR eight.[Entity2] IS NOT NULL
             OR nine.[Entity2]  IS NOT NULL;

4 个答案:

答案 0 :(得分:1)

您可以使用UNION ALL

SELECT [SessionKey]   = records.[SessionKey]
     , [SurrogateKey] = records.[SurrogateKey]
  FROM [dbo].[ConsolidatedRecords] records
  JOIN #CurrentSession             session 
   ON records.[SessionKey] = session.[SessionKey]
 WHERE EXISTS(SELECT 1 
             FROM [dbo].[Entity1] one   
             WHERE records.RecordID = one.[Entity1ID] 
               AND records.[EntityName] = N'Entity1' 
             UNION ALL
             SELECT 1 
             FROM [dbo].[Entity2] two   
             WHERE records.RecordID = two.[Entity2ID] 
               AND records.[EntityName] = N'Entity2'
             UNION ALL 
              ...
              );

答案 1 :(得分:0)

您可以使用UNION ALL并创建所有实体表的视图。您可以在查询中使用该视图。我不确定它是否有助于性能,但它变得更易于维护。

答案 2 :(得分:0)

在不知道表定义的情况下很难分辨。 IF Entity1到Entity9具有公共属性(也称为列),然后将它们组合成一个表,其中包含名为“EntityName”的额外列,如ConsolidatedRecords。然后加入EntityName并将查询切割为2个表。如果Entity1到9真的是唯一的,那么你可以联合所有单独的子查询!

HTH, 肖恩

答案 3 :(得分:0)

如果这是一个批处理类型的操作(你不经常执行它),我会建议创建一个临时(或物理)表,其中包含所有RecordID的结果。那些实体表。带有CLUSTERED INDEX(或PRIMARY KEY)的此表的执行速度比原始查询快得多。

IF OBJECT_ID('tempdb..#ValidRecords') IS NOT NULL
    DROP TABLE #ValidRecords

CREATE TABLE #ValidRecords (
    RecordID INT,
    EntityType VARCHAR(100),
    PRIMARY KEY (RecordID, EntityType))

INSERT INTO #ValidRecords (RecordID, EntityType)
SELECT [Entity1ID] AS RecordID, 'Entity1' AS EntityType FROM [dbo].[Entity1] UNION ALL
SELECT [Entity2ID] AS RecordID, 'Entity2' AS EntityType FROM [dbo].[Entity2] UNION ALL
SELECT [Entity3ID] AS RecordID, 'Entity3' AS EntityType FROM [dbo].[Entity3] UNION ALL
SELECT [Entity4ID] AS RecordID, 'Entity4' AS EntityType FROM [dbo].[Entity4]
-- ......

SELECT 
    [SessionKey] = records.[SessionKey],
    [SurrogateKey] = records.[SurrogateKey]
FROM 
    [dbo].[ConsolidatedRecords] records
    INNER JOIN #CurrentSession session ON records.[SessionKey] = session.[SessionKey]
WHERE 
    EXISTS (SELECT 'valid record' FROM #ValidRecords AS V WHERE records.RecordID = V.RecordID AND records.[EntityName] = V.EntityType)

另一方面,如果创建该表需要花费一些时间,那么您可以尝试使用多个EXISTS将多个EXISTS切换为一个UNION ALL,尽管性能可能不会提高,具体取决于完整查询的复杂程度。