- 创建表dup1
CREATE TABLE dup1
(
cola VARCHAR(10),
colb VARCHAR(10)
);
- 插入记录
INSERT INTO dup1 VALUES('1','2');
INSERT INTO dup1 VALUES('1','2');
INSERT INTO dup1 VALUES('1','3');
INSERT INTO dup1 VALUES('1','4');
INSERT INTO dup1 VALUES('1','5');
- 创建表dup2
CREATE TABLE dup2
(
cola VARCHAR(10),
colb VARCHAR(10)
);
- 插入记录
INSERT INTO dup2 VALUES('1','2');
INSERT INTO dup2 VALUES('1','2');
INSERT INTO dup2 VALUES('2','3');
INSERT INTO dup2 VALUES('2','4');
INSERT INTO dup2 VALUES('1','5');
- 创建视图
CREATE VIEW V_Dup as
SELECT * FROM dup1 UNION ALL
SELECT * FROM dup2;
注意:两个表中的记录1,2
和1,5
都是重复的但想要保留原始数据。
我的问题是如何从两个表中删除重复记录。
答案 0 :(得分:1)
创造临时。表:
DECLARE @tempDuplicateTable AS TABLE(
cola VARCHAR(10),
colb VARCHAR(10)
)
插入行副本:
INSERT INTO @tempDuplicateTable
( cola, colb )
(
SELECT a.cola, a.colb FROM dup1 a
INNER JOIN dup2 b ON b.cola = a.cola AND b.colb = a.colb
)
从表dup1和dup2中删除重复数据:
DELETE a FROM dup1 a INNER JOIN @tempDuplicateTable b ON b.cola = a.cola AND b.colb = a.colb
DELETE a FROM dup2 a INNER JOIN @tempDuplicateTable b ON b.cola = a.cola AND b.colb = a.colb
如果您只想要这个结果:
cola colb
1 2
1 3
1 4
1 5
2 3
2 4
尝试此查询:
SELECT DISTINCT * FROM V_Dup
或者您可以像这样修改您的视图:
CREATE VIEW V_Dup as
SELECT DISTINCT a.* FROM (
SELECT * FROM dup1 UNION ALL
SELECT * FROM dup2
) a
答案 1 :(得分:0)
创建一个临时表,其中的行需要从CTE
中删除,并从两个表中删除。
<强>查询强>
CREATE TABLE dup1
(
cola VARCHAR(10),
colb VARCHAR(10)
);
INSERT INTO dup1 VALUES('1','2');
INSERT INTO dup1 VALUES('1','2');
INSERT INTO dup1 VALUES('1','3');
INSERT INTO dup1 VALUES('1','4');
INSERT INTO dup1 VALUES('1','5');
CREATE TABLE dup2
(
cola VARCHAR(10),
colb VARCHAR(10)
);
INSERT INTO dup2 VALUES('1','2');
INSERT INTO dup2 VALUES('1','2');
INSERT INTO dup2 VALUES('2','3');
INSERT INTO dup2 VALUES('2','4');
INSERT INTO dup2 VALUES('1','5');
CREATE VIEW V_Dup as
SELECT * FROM dup1 UNION ALL
SELECT * FROM dup2;
;with cte as
(
select rn=row_number() over
(
partition by cola,colb
order by cola,colb
),*
from V_Dup
)
select * into #temp
from cte
where rn>1;
delete t1 from dup1 t1
inner join #temp t2
on t1.cola = t2.cola
and t1.colb = t2.colb;
delete t1 from dup2 t1
inner join #temp t2
on t1.cola = t2.cola
and t1.colb = t2.colb;
drop table #temp;
答案 2 :(得分:0)
我想您只想在视图中看到一次记录,因为您说要保留原始数据。因此,您应该在视图中使用UNION而不是UNION ALL
CREATE VIEW V_Dup as
SELECT * FROM dup1 UNION
SELECT * FROM dup2;
否则,如果要从表生成视图中删除所有重复的行,则必须执行以下操作:
;WITH DUP_CTE AS
(
SELECT cola, colb,ROW_NUMBER() OVER (PARTITION BY cola,colb ORDER BY (SELECT 0)) RN FROM V_Dup
)
DELETE FROM DUP_CTE
WHERE EXISTS( SELECT 0 FROM dup_cte AS c WHERE c.cola=dup_cte.cola AND c.colb=dup_cte.colb AND RN <> 1);
如果您只想删除重复项:
;WITH DUP_CTE AS
(
SELECT cola, colb,ROW_NUMBER() OVER (PARTITION BY cola,colb ORDER BY (SELECT 0)) RN FROM V_Dup
)
DELETE FROM DUP_CTE
WHERE RN <> 1;
但是,对于最后两个解决方案,您需要在表中使用主键。
答案 3 :(得分:0)
dup1和dup2是否可分区表?我的意思是,如果你可以添加一个列,允许理解要更新的表。 例如日期,以便如果您在一个间隔中插入日期,您更新dup1否则dup2。如果您没有分区列,则可以创建一个只需添加一个能够识别表的列(例如,值为&#39; 1的varchar(1);或者对于dup1和值为1的int 2为dup2)。此列应该是主键的一部分(这里我创建了一个ID)。该表可能如下所示:
CREATE TABLE dbo.dup1
(
cola VARCHAR (10),
colb VARCHAR (10),
ID INT IDENTITY NOT NULL,
partit NCHAR (10) CONSTRAINT DF_dup1_partit DEFAULT ('1') NOT NULL CONSTRAINT CK_dup1 CHECK ([PARTIT]='1'),
CONSTRAINT PK_dup1 PRIMARY KEY (ID, partit)
)
CREATE TABLE dbo.dup2
(
cola VARCHAR (10),
colb VARCHAR (10),
ID INT IDENTITY NOT NULL,
partit NCHAR (10) CONSTRAINT DF_dup2_partit DEFAULT ('2') NOT NULL CONSTRAINT CK_dup2 CHECK ([PARTIT]='2'),
CONSTRAINT PK_dup2 PRIMARY KEY (ID, partit)
)
这样视图将是:
CREATE VIEW V_Dup as
SELECT * FROM dup1 UNION ALL
SELECT * FROM dup2
WITH CHECK OPTION
这样你就可以使用我之前发布的代码了