我有一个字符串资源表:
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
所以这个名字" john"年龄10岁。
后来,其他人(不是我)也添加了#34; john" 10岁。
所以我想清理所有副本。
但那不是问题。在我删除之前,我希望看到所有重复项。
所以我这样做了:
SELECT *
FROM (
SELECT ID,
name,
age,
ROW_NUMBER() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte
) a WHERE a.rn>1
ORDER BY
name,
age,
a.rn
结果:
这基本上显示了我的重复。但是,只有在当前值有更多版本的情况下,我才想看到rn=1
的位置。
问题
换句话说:如何增强我的查询:
rn
)期望的结果:
ID name age rn
1 john 10 1
2 john 10 2
4 paul 6 1
5 paul 6 2
注意我知道我可以通过重新扫描同一name
和age
的表格来实现这一目标。 but我想是否有更优雅的方式。
答案 0 :(得分:2)
使用exists
运算符查找duplicated
的名称。试试这个。
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
Row_number() OVER(PARTITION BY name, age ORDER BY id) AS rn
FROM cte)
SELECT *
FROM cte1 a
WHERE EXISTS (SELECT 1
FROM cte1 b
WHERE a.name = b.name and a.age=b.age
AND b.rn > 1)
ORDER BY name, age, a.rn
或使用Inner Join
SELECT a.id,a.name,a.age
FROM cte1 a
JOIN cte1 b
ON a.name = b.name
AND a.age = b.age
AND b.rn > 1
ORDER BY a.name, a.age, a.rn
或者要在单表扫描中使用Dense_Rank
加window function
;WITH cte AS
(
SELECT 1 AS id , 'john' AS name, 10 AS age
UNION
SELECT 2 AS id , 'john' AS name, 10 AS age
UNION
SELECT 3 AS id , 'john' AS name, 12 AS age
UNION
SELECT 4 AS id , 'paul' AS name, 6 AS age
UNION
SELECT 5 AS id , 'paul ' AS name, 6 AS age
UNION
SELECT 6 AS id , 'paul different' AS name, 7 AS age
UNION
SELECT 7 AS id , 'ringo' AS name, 2 AS age
)
, cte1
AS (SELECT ID,
name,
age,
count(age) over (partition by name,age) cnt,
dense_rank() OVER(PARTITION BY name ORDER BY age) AS rn
FROM cte)
SELECT *
FROM cte1
WHERE rn = 1
AND cnt > 1