我正在尝试在T-SQL中实现2型SCD。我正在遵循此链接中给出的出色方法:https://www.mssqltips.com/sqlservertip/2883/using-the-sql-server-merge-statement-to-process-type-2-slowly-changing-dimensions/
但是,在我的情况下,表中没有内置BinaryCheckSum列。我需要即时计算。我在下面为此实现发布了我的代码(创建了一些示例数据)。
有人可以让我知道这种方法是否行得通吗?还是我需要在列本身中包含DimensionCheckSum?另外,就我而言,我不需要代理ID。 (请原谅任何语法错误)
/*
--CREATING A SAMPLE SCD TABLE WITH NO DATA
CREATE TABLE SCD_ACCOUNT
(
SCD_ACCOUNT_ID INT IDENTITY(1,1) NOT NULL,
SCD_SRC_ID INT NULL,
SCD_SF_SRC_ID INT NULL,
SCD_SF_ATTR1 VARCHAR(100) NULL,
SCD_SF_ATTR2 VARCHAR(100) NULL,
SCD_SF_ATTR3 VARCHAR(100) NULL,
SCD_SF_ATTR4 VARCHAR(100) NULL,
SCD_SF_ATTR5 VARCHAR(100) NULL,
SCD_EFF_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_EFF_DATE DEFAULT GETDATE(),
SCD_END_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_END_DATE DEFAULT '12/31/9999',
SCD_IS_ACTIVE VARCHAR(10) NULL CONSTRAINT DF_SCD_IS_ACTIVE DEFAULT 'Y',
SCD_LAST_UPD_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_LAST_UPD_DATE DEFAULT GETDATE(),
SCD_LAST_UPD_BY VARCHAR(100) NOT NULL CONSTRAINT DF_SCD_LAST_UPD_BY DEFAULT SUSER_SNAME());
SELECT * FROM SCD_ACCOUNT;
--CREATING A SOURCE TABLE WHICH WILL INSERT/UPDATE DATA INTO SCD TABLE
--THIS WILL HAVE THE SAME COLUMNS AS ABOVE TABLE EXCEPT SCD_ACCOUNT_ID, SCD_SRC_ID, SCD_IS_ACTIVE, SCD_EFF_DATE, SCD_END_DATE
CREATE TABLE SF_SRC
(
SF_SRC_ID INT NULL,
SF_ATTR1 VARCHAR(100) NULL,
SF_ATTR2 VARCHAR(100) NULL,
SF_ATTR3 VARCHAR(100) NULL,
SF_ATTR4 VARCHAR(100) NULL,
SF_ATTR5 VARCHAR(100) NULL);
--INSERTING DATA INTO SF_SRC TABLE
INSERT INTO SF_SRC VALUES(100,'Row1Attr1','Row1Attr2','Row1Attr3','Row1Attr4','Row1Attr5');
INSERT INTO SF_SRC VALUES(200,'Row2Attr1','Row2Attr2','Row2Attr3','Row2Attr4','Row2Attr5');
INSERT INTO SF_SRC VALUES(300,'Row3Attr1','Row3Attr2','Row3Attr3','Row3Attr4','Row3Attr5');
SELECT * FROM SF_SRC;
--DROP TABLE SCD_ACCOUNT;
--DROP TABLE SF_SRC;
*/
-- BEGIN OF INSERT USING MERGE
INSERT INTO dbo.SCD_ACCOUNT
( --TABLE AND COLUMNS IN WHICH TO INSERT THE DATA
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE,
BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
)
-- SELECT THE ROWS/COLUMNS TO INSERT THAT ARE OUTPUT FROM THIS MERGE STATEMENT
-- IN THIS EXAMPLE, THE ROWS TO BE INSERTED ARE THE ROWS THAT HAVE CHANGED (UPDATE).
SELECT
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE,
BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
FROM
(
-- THIS IS THE BEGINNING OF THE MERGE STATEMENT.
-- THE TARGET MUST BE DEFINED, IN THIS EXAMPLE IT IS OUR SLOWLY CHANGING
-- DIMENSION TABLE
MERGE INTO dbo.SCD_ACCOUNT AS target
-- THE SOURCE MUST BE DEFINED WITH THE USING CLAUSE
USING
(
-- THE SOURCE IS MADE UP OF THE ATTRIBUTE COLUMNS FROM THE STAGING TABLE.
SELECT
SF_SRC_ID,
SF_ATTR1,
SF_ATTR2,
SF_ATTR3,
SF_ATTR4
SF_ATTR5,
BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
FROM dbo.SF_SRC
) AS SOURCE
(
SF_SRC_ID,
SF_ATTR1,
SF_ATTR2,
SF_ATTR3,
SF_ATTR4,
SF_ATTR5,
BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
) ON --WE ARE MATCHING ON THE SOURCESYSTEMID IN THE TARGET TABLE AND THE SOURCE TABLE.
(
TARGET.SCD_SF_SRC_ID = SOURCE.SF_SRC_ID
)
-- IF THE ID'S MATCH BUT THE CHECKSUMS ARE DIFFERENT, THEN THE RECORD HAS CHANGED;
-- THEREFORE, UPDATE THE EXISTING RECORD IN THE TARGET, END DATING THE RECORD
-- AND SET THE CURRENTRECORD FLAG TO N
WHEN MATCHED AND TARGET.BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SF_ATTR5)
<> SOURCE.BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5)
AND TARGET.SCD_IS_ACTIVE = 'Y'
THEN
UPDATE SET
SCD_END_DATE = GETDATE()-1,
SCD_IS_ACTIVE = 'N',
SCD_LAST_UPD_DATE = GETDATE(),
SCD_LAST_UPD_BY = SUSER_SNAME()
-- IF THE ID'S DO NOT MATCH, THEN THE RECORD IS NEW;
-- THEREFORE, INSERT THE NEW RECORD INTO THE TARGET USING THE VALUES FROM THE SOURCE.
WHEN NOT MATCHED THEN
INSERT
(
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5
)
VALUES
(
SOURCE.SF_SRC_ID,
SOURCE.SF_ATTR1,
SOURCE.SF_ATTR2,
SOURCE.SF_ATTR3,
SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5
)
OUTPUT $ACTION,
SOURCE.SF_SRC_ID,
SOURCE.SF_ATTR1,
SOURCE.SF_ATTR2,
SOURCE.SF_ATTR3,
SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5,
GETDATE(),
'12/31/9999'
) -- THE END OF THE MERGE STATEMENT
--THE CHANGES OUTPUT BELOW ARE THE RECORDS THAT HAVE CHANGED AND WILL NEED
--TO BE INSERTED INTO THE SLOWLY CHANGING DIMENSION.
AS CHANGES
(
ACTION,
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE
)
WHERE ACTION='UPDATE';
答案 0 :(得分:0)
我设法自己找到了解决方案。如果有人需要,请在下面共享此内容:
INSERT INTO dbo.SCD_ACCOUNT
(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5, SCD_EFF_DATE, SCD_END_DATE)
SELECT SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5, SCD_EFF_DATE, SCD_END_DATE
FROM
(MERGE INTO dbo.SCD_ACCOUNT AS TARGET USING
(SELECT SF_SRC_ID,SF_ATTR1,SF_ATTR2,SF_ATTR3,SF_ATTR4,SF_ATTR5 FROM dbo.SF_SRC) AS SOURCE
(SF_SRC_ID,SF_ATTR1,SF_ATTR2,SF_ATTR3,SF_ATTR4, SF_ATTR5) ON
(TARGET.SCD_SF_SRC_ID = SOURCE.SF_SRC_ID)
WHEN MATCHED AND BINARY_CHECKSUM(TARGET.SCD_SF_SRC_ID, TARGET.SCD_SF_ATTR1, TARGET.SCD_SF_ATTR2, TARGET.SCD_SF_ATTR3, TARGET.SCD_SF_ATTR4, TARGET.SCD_SF_ATTR5)
<> BINARY_CHECKSUM(SOURCE.SF_SRC_ID, SOURCE.SF_ATTR1, SOURCE.SF_ATTR2, SOURCE.SF_ATTR3, SOURCE.SF_ATTR4, SOURCE.SF_ATTR5)
AND TARGET.SCD_IS_ACTIVE = 'Y'
THEN UPDATE SET SCD_END_DATE = GETDATE()-1, SCD_IS_ACTIVE = 'N', SCD_LAST_UPD_DATE = GETDATE(), SCD_LAST_UPD_BY = SUSER_SNAME()
WHEN NOT MATCHED THEN INSERT (SCD_SF_SRC_ID,SCD_SF_ATTR1,SCD_SF_ATTR2,SCD_SF_ATTR3,SCD_SF_ATTR4,SCD_SF_ATTR5) VALUES
(SOURCE.SF_SRC_ID,SOURCE.SF_ATTR1,SOURCE.SF_ATTR2,SOURCE.SF_ATTR3,SOURCE.SF_ATTR4,SOURCE.SF_ATTR5)
OUTPUT $ACTION, SOURCE.SF_SRC_ID,SOURCE.SF_ATTR1,SOURCE.SF_ATTR2,SOURCE.SF_ATTR3,SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5,GETDATE(),'12/31/9999')AS CHANGES
(ACTION,SCD_SF_SRC_ID,SCD_SF_ATTR1,SCD_SF_ATTR2,SCD_SF_ATTR3,SCD_SF_ATTR4,SCD_SF_ATTR5,SCD_EFF_DATE,SCD_END_DATE)
WHERE ACTION='UPDATE';