我有一个如下所示的表格,想要识别重复的推荐
Client Team Referred Date
client1 Referred Team1 2016-02-16
client1 Referred Team1 2016-02-16
client1 Referred Team1 2016-02-16
client1 Referred Team1 2016-01-28
Client2 Referred Team4 2015-07-03
Client2 Referred Team4 2015-07-03
Client3 Referred Team7 2015-04-09
Client3 Referred Team7 2015-04-09
Client3 Referred Team7 2015-04-09
Client3 Referred Team2 2016-09-28
Client3 Referred Team1 2016-10-20
Client4 Referred Team8 2016-11-18
我的查询到目前为止,但我似乎没有得到理想的结果
SELECT
ClientId
,Team
,COUNT(*) as DuplicateCount
FROM MyData
group by
ClientId
,Team
having COUNT(*) >1
我想要以下结果
Client Team Duplicate Count (Times referred to the same team)
client1 Referred Team1 4
Client2 Referred Team4 2
Client3 Referred Team7 3
Client4 Referred Team8 1
提前致谢
答案 0 :(得分:1)
您似乎希望团队中每个客户端的行数最多。这在统计信息中称为模式:
SELECT ct.*
FROM (SELECT ClientId, Team, COUNT(*) as DuplicateCount,
ROW_NUMBER() OVER (PARTITION BY ClientId ORDER BY COUNT(*) DESC) as seqnum
FROM MyData
GROUP BY ClientId, Team
) ct
WHERE seqnum = 1;
您可以使用HAVING
或WHERE
子句过滤掉非重复项。
编辑:
Kannan的答案稍有不同,因此无需使用子查询:
SELECT TOP (1) WITH TIES ClientId, Team, COUNT(*) as DuplicateCount
FROM MyData
GROUP BY ClientId, Team
ORDER BY ROW_NUMBER() OVER (PARTITION BY ClientId ORDER BY COUNT(*) DESC)
答案 1 :(得分:1)
您可以使用row_number和子查询,如下所示
Select top (1) with ties * from (
Select Client, Team , Cnt = Count(ReferredDate)
from yourtable
group by Client, Team ) a
order by row_number() over(partition by Client order by cnt desc)
答案 2 :(得分:1)
;With cte(Client,Team ,ReferredDate)
AS
(
SELECT 'Client1','Referred Team1','2016-02-16' Union all
SELECT 'Client1','Referred Team1','2016-02-16' Union all
SELECT 'Client1','Referred Team1','2016-02-16' Union all
SELECT 'Client1','Referred Team1','2016-01-28' Union all
SELECT 'Client2','Referred Team4','2015-07-03' Union all
SELECT 'Client2','Referred Team4','2015-07-03' Union all
SELECT 'Client3','Referred Team7','2015-04-09' Union all
SELECT 'Client3','Referred Team7','2015-04-09' Union all
SELECT 'Client3','Referred Team7','2015-04-09' Union all
SELECT 'Client3','Referred Team2','2016-09-28' Union all
SELECT 'Client3','Referred Team1','2016-10-20' Union all
SELECT 'Client4','Referred Team8','2016-11-18'
)
SELECT Client
,Team
,DupilcateTeamCount
FROM (
SELECT Client
,Team
,DupilcateTeamCount
,ROW_NUMBER() OVER (PARTITION BY Client ORDER BY Client ) AS Seq
FROM (
SELECT Client
,Team
,DupilcateTeamCount
,ROW_NUMBER() OVER (PARTITION BY Team ORDER BY Client) CCount
FROM (
SELECT *,COunt(Team) OVER (PARTITION BY Client,Team ORDER BY Team) AS DupilcateTeamCount
FROM cte
) Dt
) DT2
WHERE DT2.CCount = 1
) final
WHERE final.Seq = 1
输出
Client Team DupilcateTeamCount
----------------------------------------
Client1 Referred Team1 4
Client2 Referred Team4 2
Client3 Referred Team7 3
Client4 Referred Team8 1
答案 3 :(得分:1)
我猜你需要的是将每个客户/团队对的不同引用日期统计为重复。可以通过row_number()函数跟踪。
查看以下查询是否有效:
drop table test purge;
create table test (Client Varchar2(20),
Team Varchar2(20),
ReferredDate Date);
insert into test
select * from (
SELECT 'Client1','Referred Team1',to_date('2016-02-16','YYYY-MM-DD') from dual Union all
SELECT 'Client1','Referred Team1',to_date('2016-02-16','YYYY-MM-DD') from dual Union all
SELECT 'Client1','Referred Team1',to_date('2016-02-16','YYYY-MM-DD') from dual Union all
SELECT 'Client1','Referred Team1',to_date('2016-01-28','YYYY-MM-DD') from dual Union all
SELECT 'Client2','Referred Team4',to_date('2015-07-03','YYYY-MM-DD') from dual Union all
SELECT 'Client2','Referred Team4',to_date('2015-07-03','YYYY-MM-DD') from dual Union all
SELECT 'Client3','Referred Team7',to_date('2015-04-09','YYYY-MM-DD') from dual Union all
SELECT 'Client3','Referred Team7',to_date('2015-04-09','YYYY-MM-DD') from dual Union all
SELECT 'Client3','Referred Team7',to_date('2015-04-09','YYYY-MM-DD') from dual Union all
SELECT 'Client3','Referred Team2',to_date('2016-09-28','YYYY-MM-DD') from dual Union all
SELECT 'Client3','Referred Team1',to_date('2016-10-20','YYYY-MM-DD') from dual Union all
SELECT 'Client4','Referred Team8',to_date('2016-11-18','YYYY-MM-DD') from dual
);
commit;
---=========================================================================================
with t1 as (
select client, team, referreddate, row_number() over (partition by client, team order by referreddate) as dup_cnt
from test
)
select distinct client, team, max(dup_cnt)
from t1
group by client, team
order by 1,2
;
输出应为:
CLIENT TEAM MAX(DUP_CNT)
1 Client1 Referred Team1 4
2 Client2 Referred Team4 2
3 Client3 Referred Team1 1
4 Client3 Referred Team2 1
5 Client3 Referred Team7 3
6 Client4 Referred Team8 1