如何查找详细的重复记录

时间:2012-07-01 12:20:19

标签: sql-server sql-server-2005 duplicates

我需要检索有关特定字段重复记录的所有信息。

如果我使用mysql,我可以用这种方式解决:

drop table if exists test;

create table test (
id int not null auto_increment primary key,
surname varchar(50),
firstname varchar(50),
sex char(1),
dob date,
pob varchar(50),
otherfield1 varchar(50),
otherfield2 varchar(50)
) engine = myisam;


insert into test (surname,firstname,sex,dob,pob,otherfield1,otherfield2)
values 
('smith','john','M','2000-01-01','rome','xxx','yyy'),
('black','jack','M','1990-12-30','milan','aaaaa','vvvv'),
('smith','john','M','2000-01-01','rome','zzz','aaaaa'),
('white','mike','M','1980-03-01','naples','zzz','other text'),
('white','mike','M','1980-03-01','naples','zzz','foo bar'),
('smith','ann','F','1992-03-05','turin','aaaaaaa','other text');


select * from test where (surname,firstname,sex,dob,pob) in (
select 
surname,firstname,sex,dob,pob
from test
group by surname,firstname,sex,dob,pob
having count(*) > 1
)  

我会得到

"id"    "surname"   "firstname" "sex"   "dob"            "pob"  "otherfield1"   "otherfield2"
"1" "smith"           "john"    "M" "2000-01-01"    "rome"     "xxx"    "yyy"
"3" "smith"           "john"    "M" "2000-01-01"    "rome"     "zzz"    "aaaaa"
"4" "white"           "mike"    "M" "1980-03-01"    "naples"   "zzz"    "other text"
"5" "white"           "mike"    "M" "1980-03-01"    "naples"   "zzz"    "foo bar"

但是这种方法不适用于mssql 2005:

create table #test (
id int identity,
surname varchar(50),
firstname varchar(50),
sex char(1),
dob datetime,
pob varchar(50),
otherfield1 varchar(50),
otherfield2 varchar(50)
)

insert into #test (surname,firstname,sex,dob,pob,otherfield1,otherfield2) values ('smith','john','M','2000-01-01','rome','xxx','yyy');
insert into #test (surname,firstname,sex,dob,pob,otherfield1,otherfield2) values ('black','jack','M','1990-12-30','milan','aaaaa','vvvv');
insert into #test (surname,firstname,sex,dob,pob,otherfield1,otherfield2) values ('smith','john','M','2000-01-01','rome','zzz','aaaaa');
insert into #test (surname,firstname,sex,dob,pob,otherfield1,otherfield2) values ('white','mike','M','1980-03-01','naples','zzz','other text');
insert into #test (surname,firstname,sex,dob,pob,otherfield1,otherfield2) values ('white','mike','M','1980-03-01','naples','zzz','foo bar');
insert into #test (surname,firstname,sex,dob,pob,otherfield1,otherfield2) values ('smith','ann','F','1992-03-05','turin','aaaaaaa','other text');

select * from #test where (surname,firstname,sex,dob,pob) in (
select 
surname,firstname,sex,dob,pob
from #test
group by surname,firstname,sex,dob,pob
having count(*) > 1
) 

提前致谢。

修改

这是我发现的一种可能的解决方案:

select t1.* from #test as t1
inner join (select 
            surname,firstname,sex,dob,pob
            from #test
            group by surname,firstname,sex,dob,pob
            having count(*) > 1) as t2
on t1.surname = t2.surname and t1.firstname = t2.firstname and t1.sex = t2.sex and t1.dob = t2.dob and t1.pob = t2.pob

但我想知道是否有更好的方法。我不喜欢加入所有这些条件。

2 个答案:

答案 0 :(得分:1)

SELECT * /*TODO: Just list desired columns*/
FROM   (SELECT *,
                Count(*) OVER (PARTITION BY surname,firstname,sex,dob,pob) AS Cnt
        FROM   #test) T
WHERE  Cnt > 1 

-Or

SELECT *
FROM   #test t1
WHERE  EXISTS (SELECT *
               FROM   #test t2
               WHERE  t1.id <> t2.id
                      AND EXISTS (SELECT t1.surname,
                                         t1.firstname,
                                         t1.sex,
                                         t1.dob,
                                         t1.pob
                                  INTERSECT
                                  SELECT t2.surname,
                                         t2.firstname,
                                         t2.sex,
                                         t2.dob,
                                         t2.pob)) 

答案 1 :(得分:0)

select surname,firstname,sex,dob,pob from 
(
select  
surname,firstname,sex,dob,pob, count(*) as NumberOfRecords
from #test 
group by surname,firstname,sex,dob,pob 
) dupCount
Where dupCount.NumberOfRecords > 1

这应该在2005年起作用