我有一个非常简单的MERGE查询,其中源表只有大约41MB,目标表大约是4GB。我基本上只是将行条目添加到更大的目标表(同时避免重复的主键条目)。在Management Studio中执行时,此查询运行速度非常慢。它已经运行了15分钟而没有完成。只是想知道是否有任何加速执行的选项或者我做错了什么。感谢。
以下是查询:
MERGE [myschema].[targettable] target
USING [myschema].[sourcetable] source
ON target.STATION_ID = source.STATION_ID
WHEN NOT MATCHED BY TARGET THEN
INSERT (STATION_ID, SENSORNAME, TIME_TAG, ORIG_VALUE, ED_VALUE, SOURCE)
VALUES (source.STATION_ID, source.SENSORNAME, source.TIME_TAG, source.ORIG_VALUE, source.ED_VALUE, source.SOURCE);
估计执行的开始xml:
<?xml version="1.0" encoding="utf-16"?>
<ShowPlanXML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" Version="1.1" Build="10.50.1600.1" xmlns="http://schemas.microsoft.com/sqlserver/2004/07/showplan">
<BatchSequence>
<Batch>
<Statements>
<StmtSimple StatementCompId="1" StatementEstRows="92529300000" StatementId="1" StatementOptmLevel="FULL" StatementSubTreeCost="45530500" StatementText="--sp_configure 'show advanced options', 1;
--RECONFIGURE;
--GO
--sp_configure 'Ad Hoc Distributed Queries', 1;
--RECONFIGURE;
--GO
MERGE XCManager.XC_DATA1 target
USING XCManager.FrkFalls_FORMATTED source
ON target.STATION_ID = source.STATION_ID
WHEN NOT MATCHED BY TARGET THEN
 INSERT (STATION_ID, SENSORNAME, TIME_TAG, ORIG_VALUE, ED_VALUE, SOURCE)
 VALUES (source.STATION_ID, source.SENSORNAME, source.TIME_TAG, source.ORIG_VALUE, source.ED_VALUE, source.SOURCE);" StatementType="MERGE" QueryHash="0x0A5DB292FF222BCD" QueryPlanHash="0x1FBBCC92AE041A3E">
<StatementSetOptions ANSI_NULLS="true" ANSI_PADDING="true" ANSI_WARNINGS="true" ARITHABORT="true" CONCAT_NULL_YIELDS_NULL="true" NUMERIC_ROUNDABORT="false" QUOTED_IDENTIFIER="true" />
<QueryPlan CachedPlanSize="64" CompileTime="31" CompileCPU="15" CompileMemory="736">
<RelOp AvgRowSize="9" EstimateCPU="92529.3" EstimateIO="7070610" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="92529300000" LogicalOp="Merge" NodeId="1" Parallel="false" PhysicalOp="Clustered Index Merge" EstimatedTotalSubtreeCost="45530500">
<OutputList />
<Update WithOrderedPrefetch="true" DMLRequestSort="true">
<Object Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Index="[DAT1_PK]" Alias="[target]" IndexKind="Clustered" />
<SetPredicate>
<ScalarOperator ScalarString="[XConnectDB_NHDES].[XCManager].[XC_DATA1].[SOURCE] as [target].[SOURCE] = RaiseIfNullUpdate([Expr1008]),[XConnectDB_NHDES].[XCManager].[XC_DATA1].[SENSORNAME] as [target].[SENSORNAME] = RaiseIfNullUpdate([Expr1009]),[XConnectDB_NHDES].[XCManager].[XC_DATA1].[ED_VALUE] as [target].[ED_VALUE] = [XConnectDB_NHDES].[XCManager].[FrkFalls_FORMATTED].[ED_VALUE] as [source].[ED_VALUE],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[ORIG_VALUE] as [target].[ORIG_VALUE] = RaiseIfNullUpdate([XConnectDB_NHDES].[XCManager].[FrkFalls_FORMATTED].[ORIG_VALUE] as [source].[ORIG_VALUE]),[XConnectDB_NHDES].[XCManager].[XC_DATA1].[TIME_TAG] as [target].[TIME_TAG] = RaiseIfNullUpdate([XConnectDB_NHDES].[XCManager].[FrkFalls_FORMATTED].[TIME_TAG] as [source].[TIME_TAG]),[XConnectDB_NHDES].[XCManager].[XC_DATA1].[STATION_ID] as [target].[STATION_ID] = RaiseIfNullUpdate([Expr1010]),[XConnectDB_NHDES].[XCManager].[XC_DATA1].[FLAG1] as [target].[FLAG1] = [Expr1011],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[FLAG2] as [target].[FLAG2] = [Expr1012],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[FLAG3] as [target].[FLAG3] = [Expr1013],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[FLAG4] as [target].[FLAG4] = [Expr1014],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[HIGH_HIGH_FLAG] as [target].[HIGH_HIGH_FLAG] = [Expr1015],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[HIGH_FLAG] as [target].[HIGH_FLAG] = [Expr1016],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[LOW_FLAG] as [target].[LOW_FLAG] = [Expr1017],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[LOW_LOW_FLAG] as [target].[LOW_LOW_FLAG] = [Expr1018],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[ROC_FLAG] as [target].[ROC_FLAG] = [Expr1019],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[NO_CHG_FLAG] as [target].[NO_CHG_FLAG] = [Expr1020],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[ALARM_FLAG] as [target].[ALARM_FLAG] = [Expr1021],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[STD_DEV_FLAG] as [target].[STD_DEV_FLAG] = [Expr1022],[XConnectDB_NHDES].[XCManager].[XC_DATA1].[AVG_FLAG] as [target].[AVG_FLAG] = [Expr1023]">
<ScalarExpressionList>
<ScalarOperator>
<MultipleAssign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="SOURCE" />
<ScalarOperator>
<Intrinsic FunctionName="RaiseIfNullUpdate">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1008" />
</Identifier>
</ScalarOperator>
</Intrinsic>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="SENSORNAME" />
<ScalarOperator>
<Intrinsic FunctionName="RaiseIfNullUpdate">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1009" />
</Identifier>
</ScalarOperator>
</Intrinsic>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="ED_VALUE" />
<ScalarOperator>
<Identifier>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[FrkFalls_FORMATTED]" Alias="[source]" Column="ED_VALUE" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="ORIG_VALUE" />
<ScalarOperator>
<Intrinsic FunctionName="RaiseIfNullUpdate">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[FrkFalls_FORMATTED]" Alias="[source]" Column="ORIG_VALUE" />
</Identifier>
</ScalarOperator>
</Intrinsic>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="TIME_TAG" />
<ScalarOperator>
<Intrinsic FunctionName="RaiseIfNullUpdate">
<ScalarOperator>
<Identifier>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[FrkFalls_FORMATTED]" Alias="[source]" Column="TIME_TAG" />
</Identifier>
</ScalarOperator>
</Intrinsic>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="STATION_ID" />
<ScalarOperator>
<Intrinsic FunctionName="RaiseIfNullUpdate">
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1010" />
</Identifier>
</ScalarOperator>
</Intrinsic>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="FLAG1" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1011" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="FLAG2" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1012" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="FLAG3" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1013" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="FLAG4" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1014" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="HIGH_HIGH_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1015" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="HIGH_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1016" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="LOW_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1017" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="LOW_LOW_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1018" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="ROC_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1019" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="NO_CHG_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1020" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="ALARM_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1021" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="STD_DEV_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1022" />
</Identifier>
</ScalarOperator>
</Assign>
<Assign>
<ColumnReference Database="[XConnectDB_NHDES]" Schema="[XCManager]" Table="[XC_DATA1]" Alias="[target]" Column="AVG_FLAG" />
<ScalarOperator>
<Identifier>
<ColumnReference Column="Expr1023" />
</Identifier>
</ScalarOperator>
</Assign>
</MultipleAssign>
</ScalarOperator>
</ScalarExpressionList>
</ScalarOperator>
</SetPredicate>
<ActionColumn>
<ColumnReference Column="Action1007" />
</ActionColumn>
<RelOp AvgRowSize="130" EstimateCPU="9252.93" EstimateIO="0" EstimateRebinds="0" EstimateRewinds="0" EstimateRows="92529300000" LogicalOp="Compute Scalar" NodeId="3" Parallel="false" PhysicalOp="Compute Scalar" EstimatedTotalSubtreeCost="38367300">
<OutputList>
答案 0 :(得分:1)
看起来你添加的新索引并没有那么有用,因为STATION_ID
在两个表中有不同的数据类型,它需要对nvarchar(20)
进行隐式转换。
如果可能,请使这些一致。
MERGE
版本基本上与
INSERT INTO FrkFalls_FORMATTED
SELECT *
FROM XC_DATA1 source
LEFT JOIN FrkFalls_FORMATTED target
ON target.STATION_ID = source.STATION_ID
WHERE target.STATION_ID IS NULL
由于STATION_ID
不是唯一的,这可能意味着它最终会加入,然后丢弃一堆不会影响最终结果的中间记录。您上传的估算计划显示了多对多MERGE LEFT JOIN
中的920亿行。这些数字可能是错误的,但您没有提供更具信息性的实际执行计划。
您可以尝试放弃合并并重写为
INSERT INTO FrkFalls_FORMATTED
SELECT *
FROM XC_DATA1 source
WHERE NOT EXISTS (SELECT *
FROM FrkFalls_FORMATTED target
WHERE target.STATION_ID = source.STATION_ID)
这可能更有效率。
答案 1 :(得分:1)
实际上我的原始解决方案关闭了唯一的索引并不是那么好。我试图导入的遗留数据充满了许多非唯一行。这使我无法在导入后重新打开它。
我最终不得不使用临时表'q'做更多花哨的查询来解释这一点。此查询首先从源表中选择所有不同的行(基于字段TIME_TAG,STATION_ID和SENSORNAME),然后将这些行插入目标表中,只要它不是重复行(同样基于字段TIME_TAG,STATION_ID和SENSORNAME)。
;WITH q AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY TIME_TAG, STATION_ID, SENSORNAME ORDER BY TIME_TAG) rn
FROM XConnectDB_NHDES.XCManager.Lakeport_FORMATTED
)
INSERT INTO XConnectDB_NHDES.XCManager.XC_DATA1
SELECT STATION_ID, SENSORNAME, TIME_TAG, SOURCE, ORIG_VALUE, ED_VALUE, FLAG1, FLAG2, FLAG3, FLAG4, HIGH_HIGH_FLAG, HIGH_FLAG, LOW_FLAG, LOW_LOW_FLAG, ROC_FLAG, NO_CHG_FLAG, ALARM_FLAG, STD_DEV_FLAG, AVG_FLAG
FROM q source
WHERE rn = 1 and
NOT EXISTS (SELECT *
FROM XConnectDB_NHDES.XCManager.XC_DATA1 target
WHERE target.TIME_TAG = source.TIME_TAG and
target.STATION_ID = source.STATION_ID and
target.SENSORNAME = source.SENSORNAME)
答案 2 :(得分:0)
我猜你可能已经完成了数据的插入,但对于未来,我认为这个查询会更快:
INSERT INTO Test.CoveredIndexTestMaster
SELECT * FROM Test.CoveredIndexTestSubset
EXCEPT
SELECT * FROM Test.CoveredIndexTestMaster;
EXCEPT运算符返回左表(Test.CoveredIndexTestSubset)中不在右表(Test.CoveredIndexTestMaster)中的所有行,在这种情况下,它们将被插入到Test.CoveredIndexTestMaster中。 (当然,表格结构相同。)
虽然我没有时间设置完全复制你情况的测试,但我可以告诉你。 Test.CoveredIndexTestMaster有大约900K行,Test.CoveredIndexTestSubset有大约206K行。 Subset中只有超过3K的行不在Master中。
上述查询需要14秒才能运行。
两个表都没有编入索引。该表有2列。一个int和一个varchar。两个表中都有两行。
所以,你的语言会有所不同,但我很想听听你的结果。
这是基于Martin输入的更新查询:
INSERT INTO Test.CoveredIndexTestMaster
SELECT *
FROM Test.CoveredIndexTestSubset
WHERE Col1 IN ( SELECT col1 FROM Test.CoveredIndexTestSubset
EXCEPT
SELECT col1 FROM Test.CoveredIndexTestMaster )
这需要20秒才能运行。