我想删除重复项,其中asciiname,countrycode和provinceid包含相同的值。
我该怎么做?
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[cities_geonames](
[id] [int] IDENTITY(1,1) NOT NULL,
[geonameid] [float] NULL,
[asciiname] [nvarchar](255) NULL,
[country code] [nvarchar](255) NULL,
[provinceid] [int] NOT NULL,
[population] [int] NOT NULL,
CONSTRAINT [PK_cities_geonames] PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2743447,'Abelhal','PT',463,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657842,'Aberchalder','GB',201,30);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2522470,'Acebuchal','ES',353,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2522446,'Aceuchal','ES',356,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657756,'Achallader','GB',201,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959625,'Achthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959626,'Achthal','DE',314,10);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959627,'Achthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959363,'Affalterthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657642,'Aghalee','GB',202,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (4179245,'Ahaluna','US',60,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958936,'Aich halden','DE',315,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958937,'Aich halden','DE',315,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (6714269,'Air Halim Rambung','ID',551,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958612,'Albrechtsthal','DE',312,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958542,'Alexandrinenthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657476,'Allhallows','GB',203,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2956763,'Alten-thal','DE',310,4000);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2957440,'Alten-thal','DE',310,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2957169,'Althaldensleben','DE',302,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2956888,'Altrosenthal','DE',312,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (1651600,'Aluhaluh','ID',565,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (736891,'Amigdhala','GR',513,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (736889,'Amigdhalea','GR',513,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (736890,'Amigdhalea','GR',513,30);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265176,'Amigdhalea','GR',511,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265178,'Amigdhalea','GR',502,650);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265179,'Amigdhalea','GR',502,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265180,'Amigdhalea','GR',512,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265181,'Amigdhalea','GR',509,560);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265182,'Amigdhalea','GR',509,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265183,'Amigdhalea','GR',509,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265184,'Amigdhalea','GR',504,0);
更新
我道歉,事实证明我还需要一件事:保留列population
具有最高值的记录,我将如何将其纳入此陈述? (我已经更新了表创建语句和插入语句)
WITH CTE (asciiname, [country code],provinceid, DuplicateCount)
AS
(
SELECT asciiname, [country code],provinceid,
ROW_NUMBER() OVER(PARTITION BY asciiname,[country code],provinceid ORDER BY id) AS DuplicateCount
FROM cities_geonames
)
DELETE
FROM CTE
WHERE DuplicateCount > 1
GO
答案 0 :(得分:1)
with x as
(
select row_number() over
(partition by asciiname, [country code], provinceid order by population desc) rn
from cities_geonames
)
delete from x where rn > 1
答案 1 :(得分:1)
试试这个,它适用于我类似的情况:
WITH CTE (asciiname, [country code],provinceid, DuplicateCount)
AS
(
SELECT asciiname, [country code],provinceid,
ROW_NUMBER() OVER(PARTITION BY asciiname,[country code],provinceid ORDER BY id) AS DuplicateCount
FROM cities_geonames
)
DELETE
FROM CTE
WHERE DuplicateCount > 1
GO
在删除之前,如果您希望查看要删除的结果集,可以使用:
WITH CTE (asciiname, [country code],provinceid, DuplicateCount)
AS
(
SELECT asciiname, [country code],provinceid,
ROW_NUMBER() OVER(PARTITION BY asciiname,[country code],provinceid ORDER BY asciiname) AS DuplicateCount
FROM cities_geonames
)
SELECT *
FROM CTE
WHERE DuplicateCount > 1
GO
对于更新的问题,请尝试以下查询(我确信这在语法上是正确的,因为我现在没有工具可以检查,它是一个想法,如何找到人口最多的地方) ,将select *
替换为delete
:
WITH CTE (asciiname, [country code],provinceid, Population, DuplicateCount)
AS
(
SELECT OCG.asciiname, OCG.[country code],OCG.provinceid, OCG.population,
ROW_NUMBER() OVER(PARTITION BY OCG.asciiname,OCG.[country code],OCG.provinceid ORDER BY OCG.id) AS DuplicateCount
FROM cities_geonames OCG
)
SELECT *, MAX(population)
FROM CTE
WHERE DuplicateCount > 1
GROUP BY Population,asciiname,provinceid,DuplicateCount,[country code]
HAVING MAX(population) <>Population
GO