我正在尝试使用T-Sql中的Merge语句构建优化的Slowly Changing Dimension。我编写了以下代码来处理SCD1和SCD2的更改,以及数据表中的正常插入,数据来自源表,Name和Age是SCD1列,Animal和Blood是SCD2列:
DECLARE @LoadingDate DATETIME
SET @LoadingDate = '2012-08-20 14:23:29.827'
--Handle SCD1 Changes
MERGE INTO Table_2 AS DIM
USING SourceTable AS SRC
ON (DIM.ID1 = SRC.ID1
AND DIM.ID2 = SRC.ID2)
WHEN MATCHED
AND (DIM.Name <> SRC.Name
OR DIM.AGE <> SRC.AGE)
THEN
UPDATE
SET DIM.Name = SRC.Name,
DIM.Age = SRC.Age;
--Handle SCD2 Changes
INSERT INTO Table_2
(ID1, ID2, --Business Key
Name, Age, --SCD1 Columns
Animal, Blood, --SCD2 Columns
DateEffective, DateExpires)
SELECT
ID1, ID2, --Business Key
Name, Age, --SCD1 Columns
Animal, Blood, --SCD2 Columns
DateEffective, DateExpires
FROM (
MERGE Table_2 AS DIM
USING SourceTable AS SRC
ON (DIM.ID1 = SRC.ID1
AND DIM.ID2 = SRC.ID2)
WHEN NOT MATCHED
THEN INSERT VALUES
(SRC.ID1, SRC.ID2,
SRC.Name, SRC.Age,
SRC.Animal, SRC.Blood,
@LoadingDate, NULL)
WHEN MATCHED
AND DIM.DateExpires IS NULL
AND (DIM.Animal != SRC.Animal
OR DIM.Blood != SRC.Blood)
THEN UPDATE SET DIM.DateExpires = @LoadingDate
OUTPUT $action Action_Out,
SRC.ID1, SRC.ID2,
SRC.Name, SRC.Age,
SRC.Animal, SRC.Blood,
@LoadingDate AS DateEffective,
NULL AS DateExpires) AS MERGE_OUT
WHERE MERGE_OUT.Action_Out = 'UPDATE';
代码可以正常使用SCD1更改(代码的第一部分),但它给了我错误: 尝试插入新行时出现错误,业务键ID1和ID2与数据表中的任何其他行不匹配,“Labus”是名称字段中的值。
两个表的设计如下图所示,但SourceTable没有管家列:
我真的会提供一些帮助。 谢谢!
答案 0 :(得分:1)
由于错误表明隐式nvarchar到int转换出错,解决此问题的初始步骤是显式地转换所有nvarchar和int值(如下所示),然后检查是否仍然出现错误。
如果没有出现错误,则可以开始删除强制转换并将其归零到特定的int或nvarchar字段。
如果您收到错误,这可能是一个更具体的错误,可帮助您找出正在发生的代码部分。
DECLARE @LoadingDate DATETIME
SET @LoadingDate = '2012-08-20 14:23:29.827'
--Handle SCD1 Changes
MERGE INTO Table_2 AS DIM
USING SourceTable AS SRC
ON ( CAST(DIM.ID1 AS INT) = CAST(SRC.ID1 AS INT)
AND CAST(DIM.ID2 AS INT) = CAST(caSSRC.ID2 AS INT)
)
WHEN MATCHED AND ( CAST(DIM.Name AS nvarchar(255)) <> CAST(SRC.Name AS nvarchar(255))
OR CAST(DIM.AGE AS nvarchar(255)) <> CAST(SRC.AGE AS nvarchar(255))
)
THEN
UPDATE
SET
DIM.Name = CAST(SRC.Name AS nvarchar(255)) ,
DIM.Age = CAST(SRC.Age AS nvarchar(255)) ;
--Handle SCD2 Changes
INSERT INTO Table_2
( ID1 ,
ID2 , --Business Key
Name ,
Age , --SCD1 Columns
Animal ,
Blood , --SCD2 Columns
DateEffective ,
DateExpires
)
SELECT CAST(ID1 AS INT),
CAST(ID2 AS INT) , --Business Key
CAST(Name AS nvarchar(255)) ,
CAST(Age AS nvarchar(255)) , --SCD1 Columns
CAST(Animal AS nvarchar(255)) ,
CAST(Blood AS nvarchar(255)) , --SCD2 Columns
DateEffective ,
DateExpires
FROM (
MERGE Table_2 AS DIM
USING SourceTable AS SRC
ON ( CAST(DIM.ID1 AS INT) = CAST(SRC.ID1 AS INT)
AND CAST(DIM.ID2 AS INT) = CAST(SRC.ID2 AS INT)
)
WHEN NOT MATCHED
THEN INSERT
VALUES ( CAST(SRC.ID1 AS INT) ,
CAST(SRC.ID2 AS INT) ,
CAST(SRC.Name AS NVARCHAR(255)),
CAST(SRC.Age AS NVARCHAR(255)),
CAST(SRC.Animal AS NVARCHAR(255)),
CAST(SRC.Blood AS NVARCHAR(255)),
@LoadingDate ,
NULL
)
WHEN MATCHED AND DIM.DateExpires IS NULL
AND ( CAST(DIM.Animal AS NVARCHAR(255)) != CAST(SRC.Animal AS NVARCHAR(255))
OR CAST(DIM.Blood AS NVARCHAR(255)) != CAST(SRC.Blood AS NVARCHAR(255))
)
THEN UPDATE
SET DIM.DateExpires = @LoadingDate
OUTPUT
$action Action_Out ,
SRC.ID1 ,
SRC.ID2 ,
SRC.Name ,
SRC.Age ,
SRC.Animal ,
SRC.Blood ,
@LoadingDate AS DateEffective ,
NULL AS DateExpires) AS MERGE_OUT
WHERE MERGE_OUT.Action_Out = 'UPDATE' ;
--...
--...
--...
答案 1 :(得分:0)
尝试修改OUTPUT子句,如下所示:
OUTPUT
$action Action_Out ,
SRC.ID1 AS ID1,
SRC.ID2 AS ID2,
SRC.Name AS Name,
SRC.Age AS Age,
SRC.Animal AS Animal,
SRC.Blood AS Blood,
@LoadingDate AS DateEffective ,
NULL AS DateExpires) AS MERGE_OUT
WHERE MERGE_OUT.Action_Out = 'UPDATE' ;
答案 2 :(得分:0)
试试这个: 基本上,我在SELECT列中使用了别名 (可组合DML仍有改进的余地。)
另外,当您在子查询中合并到Table_2中时,是否有理由再次将其他记录(用于更新操作)插入Table_2?
DECLARE @LoadingDate DATETIME
SET @LoadingDate = '2012-08-20 14:23:29.827'
--Handle SCD1 Changes
MERGE INTO Table_2 AS DIM
USING SourceTable AS SRC
ON ( DIM.ID1 = SRC.ID1
AND DIM.ID2 = SRC.ID2
)
WHEN MATCHED AND ( DIM.Name <> SRC.Name
OR DIM.AGE <> SRC.AGE
)
THEN
UPDATE
SET
DIM.Name = SRC.Name ,
DIM.Age = SRC.Age ;
--Handle SCD2 Changes
INSERT INTO Table_2
( ID1 ,
ID2 , --Business Key
Name ,
Age , --SCD1 Columns
Animal ,
Blood , --SCD2 Columns
DateEffective ,
DateExpires
)
SELECT MERGE_OUT.ID1 ,
MERGE_OUT.ID2 , --Business Key
MERGE_OUT.Name ,
MERGE_OUT.Age , --SCD1 Columns
MERGE_OUT.Animal ,
MERGE_OUT.Blood , --SCD2 Columns
MERGE_OUT.DateEffective ,
MERGE_OUT.DateExpires
FROM (
MERGE INTO Table_2 AS DIM
USING SourceTable AS SRC
ON ( DIM.ID1 = SRC.ID1
AND DIM.ID2 = SRC.ID2
)
WHEN NOT MATCHED
THEN INSERT
VALUES ( SRC.ID1 ,
SRC.ID2 ,
SRC.Name ,
SRC.Age ,
SRC.Animal ,
SRC.Blood ,
@LoadingDate ,
NULL
)
WHEN MATCHED AND DIM.DateExpires IS NULL
AND ( DIM.Animal != SRC.Animal
OR DIM.Blood != SRC.Blood
)
THEN UPDATE
SET DIM.DateExpires = @LoadingDate
OUTPUT
$action AS Action_Out ,
SRC.ID1 AS ID1 ,
SRC.ID2 AS ID2 ,
SRC.NAME AS Name ,
SRC.Age AS Age ,
SRC.Animal AS Animal ,
SRC.Blood AS Blood ,
@LoadingDate AS DateEffective ,
NULL AS DateExpires) AS MERGE_OUT ( ID1, ID2, Name, Age, Animal, Blood,
DateEffective, DateExpires )
WHERE MERGE_OUT.Action_Out = 'UPDATE' ;