我的问题是如何识别SQL表中的重复(重复)'组'数据。我目前正在使用SQL Server 2005,所以更喜欢基于那个或ansi-sql的解决方案。
以下是一个示例表和预期结果(如下):
declare @data table (id nvarchar(10), fund nvarchar(1), xtype nvarchar(1))
insert into @data select 'Switch_1', 'A', 'S'
insert into @data select 'Switch_1', 'X', 'B'
insert into @data select 'Switch_1', 'Y', 'B'
insert into @data select 'Switch_1', 'Z', 'B'
insert into @data select 'Switch_2', 'A', 'S'
insert into @data select 'Switch_2', 'X', 'B'
insert into @data select 'Switch_2', 'Y', 'B'
insert into @data select 'Switch_2', 'Z', 'B'
insert into @data select 'Switch_3', 'C', 'S'
insert into @data select 'Switch_3', 'D', 'B'
insert into @data select 'Switch_4', 'C', 'S'
insert into @data select 'Switch_4', 'F', 'B'
(新数据)
insert into @data select 'Switch_5', 'A', 'S'
insert into @data select 'Switch_5', 'X', 'B'
insert into @data select 'Switch_5', 'Y', 'B'
insert into @data select 'Switch_5', 'Z', 'B'
-- id fund xtype match
-- ---------- ---- ----- ---------
-- Switch_1 A S Match_1
-- Switch_1 X B Match_1
-- Switch_1 Y B Match_1
-- Switch_1 Z B Match_1
-- Switch_2 A S Match_1
-- Switch_2 X B Match_1
-- Switch_2 Y B Match_1
-- Switch_2 Z B Match_1
-- Switch_3 C S
-- Switch_3 D B
-- Switch_4 C S
-- Switch_4 F B
(新结果)
-- Switch_5 A S Match_1
-- Switch_5 X B Match_1
-- Switch_5 Y B Match_1
-- Switch_5 Z B Match_1
我只想在ALL或NOTHING的基础上进行匹配(即组中的所有记录都匹配另一组中的所有记录 - 而不是部分匹配)。可以使用任何匹配ID(我上面使用了Match_1,但可以是数字等)。
感谢您的帮助。
(编辑:我想我应该补充一点,每组可以有任意数量的行,而不仅仅是上面示例中显示的2或4行 - 而且我也试图避免使用游标)
(编辑2:如果找到多个匹配项,我似乎遇到了问题。当找到多个匹配项时,提供的SQL输出会返回Switch_1的重复记录。我已相应更新了示例数据。不确定列文是否仍然遵循这一点 - 我也在寻找解决方案,如果找到的话会发布在这里。)
答案 0 :(得分:2)
q
:使用XML PATH构造将一个ID的所有资金和xtypes合并为一个字符串r
:选择一个ROW_NUMBER以及相应组的相应ID @data
和r
;WITH q AS (
SELECT DISTINCT d.id
, DuplicateData = STUFF((SELECT ', ' + fund + xtype FROM @data WHERE id = d.id FOR XML PATH('')), 1, 2, '')
FROM @data d
)
, r AS (
SELECT id1 = q1.id
, id2 = q2.id
, rn = ROW_NUMBER() OVER (ORDER BY q1.ID)
FROM q q1
INNER JOIN q q2 ON q1.DuplicateData = q2.DuplicateData AND q1.id < q2.id
)
SELECT id
, fund
, xtype
, match = 'Match_' + CAST(r.rn AS VARCHAR(32))
FROM @data d
LEFT OUTER JOIN r ON d.id IN (r.id1, r.id2)
id fund xtype match
---------- ---- ----- --------------------------------------
Switch_1 A S Match_1
Switch_1 X B Match_1
Switch_1 Y B Match_1
Switch_1 Z B Match_1
Switch_2 A S Match_1
Switch_2 X B Match_1
Switch_2 Y B Match_1
Switch_2 Z B Match_1
Switch_3 C S NULL
Switch_3 D B NULL
Switch_4 C S NULL
Switch_4 F B NULL
答案 1 :(得分:1)
这是另一个查询:
create table #temp1 (
id varchar(10),
fund nvarchar(1),
xtype nvarchar(1)
)
insert into #temp1 select 'Switch_1', 'A', 'S'
insert into #temp1 select 'Switch_1', 'X', 'B'
insert into #temp1 select 'Switch_1', 'Y', 'B'
insert into #temp1 select 'Switch_1', 'Z', 'B'
insert into #temp1 select 'Switch_2', 'A', 'S'
insert into #temp1 select 'Switch_2', 'X', 'B'
insert into #temp1 select 'Switch_2', 'Y', 'B'
insert into #temp1 select 'Switch_2', 'Z', 'B'
insert into #temp1 select 'Switch_3', 'C', 'S'
insert into #temp1 select 'Switch_3', 'D', 'B'
insert into #temp1 select 'Switch_4', 'C', 'S'
insert into #temp1 select 'Switch_4', 'F', 'B'
select t1.*, case when t2.equal = t3.total then 'True' else 'False' end as 'Match' from #temp1 t1
left outer join (select m.id, count(m2.id) as 'equal' from #temp1 m
inner join #temp1 m2 on m.Id <> m2.Id and m.fund = m2.fund and m.xtype = m2.xtype
group by m.id) t2 on t1.id = t2.id
inner join (select m3.id, count(m3.fund) as 'total' from #temp1 m3 group by m3.id) t3 on t3.id = t1.id
drop table #temp1