请参阅下面的DDL:
CREATE Table #Grouping1 (GroupID int, SystemID int, RecordID int, Name varchar(100), DateOfBirth datetime)
INSERT INTO #Grouping1 values (1,1,1,'Mark Williams', '1980-01-01')
INSERT INTO #Grouping1 values (1,2,128,'Mark Welliams', '1980-01-01')
INSERT INTO #Grouping1 values (1,3,36,'Marko Williams', '1980-01-01')
INSERT INTO #Grouping1 values (2,1,18,'Anne Smith', '1960-01-23')
INSERT INTO #Grouping1 values (2,2,64,'Anna Smyth', '1960-01-23')
INSERT INTO #Grouping1 values (2,8,23,'Annie Smith', '1960-01-23')
该表显示,一个人在三个系统中有三个记录,另一个人在三个系统中有三个记录(组ID表示人员已链接)。
我正在升级此系统,并且组ID已更改。例如,请参阅下面的DDL:
CREATE Table #Grouping2 (GroupID int, SystemID int, RecordID int, Name varchar(100), DateOfBirth datetime)
INSERT INTO #Grouping2 values (187,1,1,'Mark Williams', '1980-01-01')
INSERT INTO #Grouping2 values (187,2,128,'Mark Welliams', '1980-01-01')
INSERT INTO #Grouping2 values (208,1,18,'Anne Smith', '1960-01-23')
INSERT INTO #Grouping2 values (208,2,64,'Anna Smyth', '1960-01-23')
INSERT INTO #Grouping2 values (208,8,23,'Annie Smith', '1960-01-23')
我想检查链接在一起的所有人是否仍然链接在一起。我在考虑做这样的事情。
select * into #OldTable from (
select grouping1a.groupid, grouping1a.systemid as systemid1,grouping1a.recordid as recordid1, grouping1b.systemid as systemid2,grouping1b.recordid as recordid2 from #grouping1 as grouping1a
inner join
#grouping1 as grouping1b on grouping1a.groupid=grouping1b.groupid
where not (grouping1a.SYSTEMID=grouping1B.SYSTEMID AND grouping1A.recordID=grouping1B.recordID)
) as OldTable
select * into #NewTable from (
select grouping1a.groupid, grouping1a.systemid as systemid1,grouping1a.recordid as recordid1, grouping1b.systemid as systemid2,grouping1b.recordid as recordid2 from #grouping2 as grouping1a
inner join
#grouping2 as grouping1b on grouping1a.groupid=grouping1b.groupid
where not (grouping1a.SYSTEMID=grouping1B.SYSTEMID AND grouping1A.recordID=grouping1B.recordID)
) As NewTable
select distinct #OldTable.groupid from #oldtable full outer join #newtable on
#oldtable.systemid1=#newtable.systemid1 and #oldtable.recordid1=#newtable.recordid1
and #oldtable.systemid2=#newtable.systemid2 and #oldtable.recordid2=#newtable.recordid2
where #oldtable.systemid1 is null or #newtable.systemid1 is null
这会告诉我所有有问题的群体。我不能这样做,因为#Grouping1和#Grouping2中有超过100,000,000行。还有另一种方法可以解决这个问题吗使用校验和或HashByte?
答案 0 :(得分:2)
我认为您正在寻找checksum_agg()
,记录here。如果你在名字上这样做:
select groupId, checksum_agg(name)
from #Grouping2
group by groupId;
或者通过系统/记录:
select groupId, checksum_agg(cast(systemId as varchar(255)) + ':' + cast(recordId as varchar(255)))
from #Grouping2
group by groupId;
然后,您可以使用full outer join
查找摘要表之间的差异。您似乎理解将查询放在一起的部分内容。