示例数据
userid email_address login_name name Title org phone_number_com
============= ========================== =============== ================== ========== ============= ===================
1192 Steve.Jobs@apple.com sjobs Steve Jobs CEO Apple N/A
1274 Steve.Jobs@apple.com sjobs Steve Jobs CFO Apple 697-4686
1192 Steven.jobs@apple.com sjobs Steven jobs CEO Apple 604-7126
1885 Bill.Gates@microsoft.com bgates Bill Gates CEO Microsoft 604-7114
1920 Bill.Gates@microsoft.com bgates William Gates CTR Microsoft 604-7247
1951 Warren.Buffet@hp.com wbuffet Warren Buffet CEO HP 614-9141
1954 Warren.Buffet@hp.com wbuffet W. Buffet COO HP 614-7589
1951 Warren.Buffet@xerox.com wbuffet Warren S Buffet CIO Xerox 614-8874
1956 Mark.Zuckerberg@fb.com mzuck Mark Zuckerberg CEO FB 614-8295
QUERY
SELECT *
FROM
(
SELECT userid, name, login_name, email_address, phone_number_com,
ROW_NUMBER() OVER(PARTITION BY [login_name] ORDER BY login_name) Num_Duplicates
FROM web_user
) as Rows
WHERE Num_Duplicates > 1
这是我的第一篇文章,希望我遵守所有程序。我得到一个结果集,显示重复的第2行和第3行。我正在尝试GROUP BY
login_name
,只显示最高Num_Duplicates
的行。如果login_name
的{{1}}为2和3,则仅显示3行。我希望这有意义!提前感谢您提供的任何指导。
这些结果我想要查询输出:
Num_Duplicates
答案 0 :(得分:0)
如果我理解您要正确执行的操作,则首先按登录名分组以获取重复项数:
SELECT login_name, COUNT(*) AS num_duplicates
FROM web_user
GROUP BY login_name
在这里你可以使用带有ROW_NUMBER()
的子查询(虽然我建议在绑定的情况下使用RANK()
)或者你可以在窗口函数中使用聚合:
SELECT login_name, COUNT(*) AS num_duplicates
, RANK() OVER ( ORDER BY COUNT(*) DESC ) AS rn
FROM web_user
GROUP BY login_name;
然后将其放在子查询中以仅获得具有最多重复项的login_name
:
SELECT * FROM (
SELECT login_name, COUNT(*) AS num_duplicates
, RANK() OVER ( ORDER BY COUNT(*) DESC ) AS rn
FROM web_user
GROUP BY login_name
) WHERE rn = 1;
根据OP的评论更新,问题编辑:
SELECT userid, name, login_name, email_address, phone_number_com, num_duplicates
FROM (
SELECT userid, name, login_name, email_address, phone_number_com
, COUNT(*) OVER ( PARTITION BY login_name ) AS num_duplicates
, ROW_NUMBER() OVER ( PARTITION BY login_name ORDER BY userid ) AS rn
FROM web_user
) WHERE num_duplicates > 1 AND rn = 1;
我上面所做的是使用COUNT(*)
作为窗函数;按login_name
分区将获得每个登录名的计数。我也按login_name
进行分区以获取ROW_NUMBER()
并按userid
排序,以便我可以返回最小值(您似乎在所需的输出中执行此操作)。
答案 1 :(得分:0)
嗯 - 从你的描述中听起来就像你只是想要这样的东西(在我的头顶):
SELECT login_name, email_address
FROM web_user
GROUP BY login_name, email_address
HAVING count(*) > 2
答案 2 :(得分:0)
以下内容应该为您提供所需。
ROW_NUMBER
窗口函数用于标识login_name的第一行。
COUNT
窗口函数用于计算每个login_name的行数。
外部查询然后将结果限制为具有多于1行的login_name,并且仅返回每个login_name的第一行。
DECLARE @users TABLE
(
userid int
, email_address varchar(100)
, login_name varchar(100)
, name varchar(100)
, title varchar(100)
, org varchar(100)
, phone_number_com varchar(100)
)
INSERT INTO @users
VALUES
(1192, 'Steve.Jobs@apple.com', 'sjobs', 'Steve Jobs', 'CEO', 'Apple', 'N/A')
, (1274, 'Steve.Jobs@apple.com', 'sjobs', 'Steve Jobs', 'CFO', 'Apple', '697-4686')
, (1192, 'Steven.jobs@apple.com', 'sjobs', 'Steven jobs', 'CEO', 'Apple', '604-7126')
, (1885, 'Bill.Gates@microsoft.com', 'bgates', 'Bill Gates', 'CEO', 'Microsoft', '604-7114')
, (1920, 'Bill.Gates@microsoft.com', 'bgates', 'William Gates', 'CTR', 'Microsoft', '604-7247')
, (1951, 'Warren.Buffet@hp.com', 'wbuffet', 'Warren Buffet', 'CEO', 'HP', '614-9141')
, (1954, 'Warren.Buffet@hp.com', 'wbuffet', 'W. Buffet', 'COO', 'HP', '614-7589')
, (1951, 'Warren.Buffet@xerox.com', 'wbuffet', 'Warren S Buffet', 'CIO', 'Xerox', '614-8874')
, (1956, 'Mark.Zuckerberg@fb.com', 'mzuck', 'Mark Zuckerberg', 'CEO', 'FB', '614-8295')
;
WITH LoginWithWindowFunction AS
(
SELECT
*
, ROW_NUMBER() OVER(PARTITION BY login_name ORDER BY userid) AS LoginOrder
, COUNT(*) OVER(PARTITION BY login_name) AS Num_Duplicates
FROM
@users
)
SELECT
userid
, email_address
, login_name
, name
, title
, org
, phone_number_com
, Num_Duplicates
FROM
LoginWithWindowFunction
WHERE
LoginOrder = 1
AND Num_Duplicates > 1
ORDER BY
userid