删除3列包含相同值的重复项,并保留最高值的记录

时间:2014-01-10 10:02:11

标签: sql-server-2008 tsql duplicates multiple-columns

我想删除重复项,其中asciiname,countrycode和provinceid包含相同的值。

我该怎么做?

SET ANSI_NULLS ON
GO

SET QUOTED_IDENTIFIER ON
GO

CREATE TABLE [dbo].[cities_geonames](
    [id] [int] IDENTITY(1,1) NOT NULL,
    [geonameid] [float] NULL,
    [asciiname] [nvarchar](255) NULL,
    [country code] [nvarchar](255) NULL,
    [provinceid] [int] NOT NULL,
            [population] [int] NOT NULL,
 CONSTRAINT [PK_cities_geonames] PRIMARY KEY CLUSTERED 
(
    [id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]

GO


INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2743447,'Abelhal','PT',463,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657842,'Aberchalder','GB',201,30);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2522470,'Acebuchal','ES',353,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2522446,'Aceuchal','ES',356,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657756,'Achallader','GB',201,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959625,'Achthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959626,'Achthal','DE',314,10);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959627,'Achthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2959363,'Affalterthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657642,'Aghalee','GB',202,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (4179245,'Ahaluna','US',60,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958936,'Aich halden','DE',315,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958937,'Aich halden','DE',315,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (6714269,'Air Halim Rambung','ID',551,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958612,'Albrechtsthal','DE',312,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2958542,'Alexandrinenthal','DE',314,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2657476,'Allhallows','GB',203,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2956763,'Alten-thal','DE',310,4000);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2957440,'Alten-thal','DE',310,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2957169,'Althaldensleben','DE',302,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (2956888,'Altrosenthal','DE',312,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (1651600,'Aluhaluh','ID',565,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (736891,'Amigdhala','GR',513,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (736889,'Amigdhalea','GR',513,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (736890,'Amigdhalea','GR',513,30);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265176,'Amigdhalea','GR',511,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265178,'Amigdhalea','GR',502,650);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265179,'Amigdhalea','GR',502,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265180,'Amigdhalea','GR',512,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265181,'Amigdhalea','GR',509,560);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265182,'Amigdhalea','GR',509,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265183,'Amigdhalea','GR',509,0);
INSERT INTO cities_geonames_test (geonameid,asciiname,[country code],provinceid,[population]) VALUES (265184,'Amigdhalea','GR',504,0);

更新

我道歉,事实证明我还需要一件事:保留列population具有最高值的记录,我将如何将其纳入此陈述? (我已经更新了表创建语句和插入语句)

WITH CTE (asciiname, [country code],provinceid, DuplicateCount)
AS
(
  SELECT asciiname, [country code],provinceid,
    ROW_NUMBER() OVER(PARTITION BY asciiname,[country code],provinceid ORDER BY id) AS  DuplicateCount
  FROM cities_geonames
)
DELETE
FROM CTE
  WHERE DuplicateCount > 1
GO

2 个答案:

答案 0 :(得分:1)

with x as
(
select row_number() over 
  (partition by asciiname, [country code], provinceid order by population desc) rn
from cities_geonames
)
delete from x where rn > 1

答案 1 :(得分:1)

试试这个,它适用于我类似的情况:

WITH CTE (asciiname, [country code],provinceid, DuplicateCount)
AS
(
  SELECT asciiname, [country code],provinceid,
    ROW_NUMBER() OVER(PARTITION BY asciiname,[country code],provinceid ORDER BY id) AS  DuplicateCount
  FROM cities_geonames
)
DELETE
FROM CTE
  WHERE DuplicateCount > 1
GO

在删除之前,如果您希望查看要删除的结果集,可以使用:

WITH CTE (asciiname, [country code],provinceid, DuplicateCount)
    AS
    (
      SELECT asciiname, [country code],provinceid,
        ROW_NUMBER() OVER(PARTITION BY asciiname,[country code],provinceid ORDER BY asciiname) AS  DuplicateCount
      FROM cities_geonames
    )
    SELECT *
    FROM CTE
      WHERE DuplicateCount > 1
    GO

对于更新的问题,请尝试以下查询(我确信这在语法上是正确的,因为我现在没有工具可以检查,它是一个想法,如何找到人口最多的地方) ,将select *替换为delete

WITH CTE (asciiname, [country code],provinceid, Population, DuplicateCount)
    AS
    (
      SELECT OCG.asciiname, OCG.[country code],OCG.provinceid, OCG.population, 
        ROW_NUMBER() OVER(PARTITION BY OCG.asciiname,OCG.[country code],OCG.provinceid ORDER BY OCG.id) AS  DuplicateCount
      FROM cities_geonames OCG 
    )
    SELECT *, MAX(population)
    FROM CTE
      WHERE DuplicateCount > 1
      GROUP BY Population,asciiname,provinceid,DuplicateCount,[country code]
         HAVING MAX(population) <>Population 
    GO