我的表名为Customer
,其中包含基于某些字段的重复记录
客户表:
CUST_ID
EMAIL_ID
ROLE_ID
DOB
CREATION_DATE
数据如下所示:
cust_id email_id role_id dob creation_date
1 abc@abc.com 5 4/2/1966 17/09/2016
2 abc@abc.com 5 4/2/1966 20/09/2016
3 xyz@xyz.com 5 15/2/1991 18/09/2016
4 xyz@xyz.com 5 15/2/1991 21/09/2016
5 pqr@pqr.com 5 16/2/1985 30/09/2016
6 pqr@pqr.com 5 16/2/1985 05/11/2016
7 pqr@pqr.com 5 16/2/1985 04/11/2016
对于2条或更多条记录,email_id,role_id和dob相同(重复),如上表所示。
我想要两个不同的查询,结果如下:
cust_id email_id role_id dob creation_date
1 abc@abc.com 5 4/2/1966 17/09/2016
3 xyz@xyz.com 5 15/2/1991 18/09/2016
5 pqr@pqr.com 5 16/2/1985 30/09/2016
即,删除基于email_id,role_id和dob的重复记录,其creation_date小于另一个重复记录。
cust_id email_id role_id dob creation_date
2 abc@abc.com 5 4/2/1966 20/09/2016
4 xyz@xyz.com 5 15/2/1991 21/09/2016
6 pqr@pqr.com 5 16/2/1985 05/11/2016
即,删除基于email_id,role_id和dob的重复记录,其creation_date大于其他重复记录。
编辑:针对上述问题的反问题。
现在,当我连接两个名为Customer and Individual的表时,我将如何得到与上述相同的结果。
客户表:
CUST_ID
EMAIL_ID
ROLE_ID
individaul_id(外键)
creation_date
个性表:
individaul_id
dob
使用以下查询:
SELECT c.email_id,c.role_id,i.dob FROM CUSTOMER c
JOIN INDIVIDUAL i on c.individaul_id=i.individaul_id
GROUP BY c.email_id,c.role_id,i.dob
Having count(*) >=2
我正在使用 MSSQL server 2012 数据库 非常感谢提前。
答案 0 :(得分:2)
您可以使用ROW_NUMBER()按创建日期排序并过滤掉重复记录
第一个查询给出最小创建日期的记录
;WITH cte AS (
SELECT cust_id, email_id, role_id, dob, creation_date ,
ROW_NUMBER() OVER(PARTITION BY email_id, role_id, dob ORDER BY creation_date ) seq FROM customer
)
SELECT cust_id, email_id, role_id, dob, creation_date
FROM cte
WHERE seq = 1
对于最大创建日期,同一查询使用ORDER BY按降序完成
;WITH cte AS (
SELECT cust_id, email_id, role_id, dob, creation_date ,
ROW_NUMBER() OVER(PARTITION BY email_id, role_id, dob ORDER BY creation_date DESC ) seq FROM customer
)
SELECT cust_id, email_id, role_id, dob, creation_date
FROM cte
WHERE seq = 1
修改强> 对于JOIN查询,只需将连接条件添加到CTE表达式SELECT语句
;WITH cte AS (
SELECT c.cust_id, c.email_id, c.role_id, i.dob, c.creation_date ,
ROW_NUMBER() OVER(PARTITION BY c.email_id, c.role_id, c.dob ORDER BY c.creation_date ) seq
FROM customer c
JOIN INDIVIDUAL i on c.individaul_id=i.individaul_id
)
SELECT cust_id, email_id, role_id, dob, creation_date
FROM cte
WHERE seq = 1
对于最大创建日期,同一查询使用ORDER BY按降序完成
;WITH cte AS (
SELECT c.cust_id, c.email_id, c.role_id, i.dob, c.creation_date ,
ROW_NUMBER() OVER(PARTITION BY c.email_id, c.role_id, c.dob ORDER BY c.creation_date DESC ) seq
FROM customer c
JOIN INDIVIDUAL i on c.individaul_id=i.individaul_id
)
SELECT cust_id, email_id, role_id, dob, creation_date
FROM cte
WHERE seq = 1
答案 1 :(得分:1)
使用最小和最大功能
select min(cust_id),email_id,role_id,dob,min(creation_date) from customer group by email_id,role_id,dob;
select max(cust_id),email_id,role_id,dob,max(creation_date) from customer group by email_id,role_id,dob;
希望它有效
答案 2 :(得分:0)
您可以使用ROW_NUMBER
和PARTITION
来实现此目的。只需谷歌相同。
检查此查询:
Declare @customer table(cust_id int, email_id varchar(200), role_id int, dob datetime, creation_date datetime)
Insert into @customer
values(1,'abc@abc.com',5,'04-feb-1966','17-sep-2016'),
(2,'abc@abc.com',5,'04-feb-1966','20-sep-2016'),
(3,'xyz@xyz.com',5,'15-feb-1991','18-sep-2016'),
(4,'xyz@xyz.com',5,'15-feb-1991','21-sep-2016'),
(5,'pqr@pqr.com',5,'16-feb-1985','30-sep-2016'),
(6,'pqr@pqr.com',5,'16-feb-1985','05-nov-2016'),
(7,'pqr@pqr.com',5,'16-feb-1985','04-nov-2016')
--using row number and partition to group data and remove duplicate
;with custCTE as(
select cust_id, email_id, role_id,dob,creation_date,row_number() over(partition by email_id, role_id, dob order by creation_date ) as rnk
from @customer
)
delete from @customer where cust_id in (select cust_id from custCTE where rnk <> 1)
select * from @customer
答案 3 :(得分:0)
这是解决方案。
DECLARE @MainTable TABLE
(
Cust_Id INT,
Email_Id NVARCHAR(250),
Role_Id INT,
DOB DATE,
Creation_Date DATE
)
DECLARE @Table1 TABLE
(
Cust_Id INT,
Email_Id NVARCHAR(250),
Role_Id INT,
DOB DATE,
Creation_Date DATE
)
DECLARE @Table2 TABLE
(
Cust_Id INT,
Email_Id NVARCHAR(250),
Role_Id INT,
DOB DATE,
Creation_Date DATE
)
INSERT INTO @MainTable
( Cust_Id ,
Email_Id ,
Role_Id ,
DOB ,
Creation_Date
)
VALUES ( 1 , N'abc@abc.com' , 5 , '2/4/1966' , '09/17/2016' ),
( 2 , N'abc@abc.com' , 5 , '2/4/1966' , '09/20/2016' ),
( 3 , N'xyz@xyz.com' , 5 , '2/15/1991' , '09/18/2016' ),
( 4 , N'xyz@xyz.com' , 5 , '2/15/1991' , '09/21/2016' ),
( 5 , N'pqr@pqr.com' , 5 , '2/16/1985' , '09/30/2016' ),
( 6 ,N'pqr@pqr.com' , 5 , '2/16/1985' , '11/05/2016' ),
( 7 , N'pqr@pqr.com' , 5 , '2/16/1985' , '11/04/2016' )
;WITH MainTable AS (
SELECT
Cust_Id ,
Email_Id ,
Role_Id ,
DOB ,
Creation_Date ,
RANK() OVER (PARTITION BY Email_Id, Role_Id, DOB ORDER BY Creation_Date) AS [Rank]
FROM @MainTable
)
INSERT INTO @Table1
SELECT
MainTable.Cust_Id ,
MainTable.Email_Id ,
MainTable.Role_Id ,
MainTable.DOB ,
MainTable.Creation_Date
FROM MainTable
WHERE MainTable.[Rank] = 1
;WITH MainTable AS (
SELECT
Cust_Id ,
Email_Id ,
Role_Id ,
DOB ,
Creation_Date ,
RANK() OVER (PARTITION BY Email_Id, Role_Id, DOB ORDER BY Creation_Date) AS [Rank]
FROM @MainTable
)
INSERT INTO @Table2
SELECT
MainTable.Cust_Id ,
MainTable.Email_Id ,
MainTable.Role_Id ,
MainTable.DOB ,
MainTable.Creation_Date
FROM MainTable
WHERE MainTable.[Rank] <> 1
SELECT * FROM @MainTable ORDER BY Cust_Id
SELECT * FROM @Table1 ORDER BY Cust_Id
SELECT * FROM @Table2 ORDER BY Cust_Id
基本上,使用sql server windows函数可以更好地处理这些问题。在具有添加功能的sql server 2012中,窗口函数功能更强大。因此,上面的代码在MSSQL 2012中可以正常工作。
答案 4 :(得分:0)
我已经使用@navintb回答的查询并将其修改如下,以删除重复的结果,以便获得所需的输出。
SELECT max(cust_id),c.email_id,c.role_id,i.dob,max(creation_date) FROM
CUSTOMER c
JOIN INDIVIDUAL i on c.individual_id=i.individual_id
GROUP BY c.email_id,c.role_id,i.dob
Having count(*) >=2
和
SELECT min(cust_id),c.email_id,c.role_id,i.dob,min(creation_date) FROM
CUSTOMER c
JOIN INDIVIDUAL i on c.individual_id=i.individual_id
GROUP BY c.email_id,c.role_id,i.dob
Having count(*) >=2