表DDL:
CREATE TABLE entities_patents (
entity_id INTEGER REFERENCES entities(entity_id) NOT NULL,
patent_id INTEGER REFERENCES patents (patent_id) NOT NULL,
doc_number text NOT NULL,
created_at timestamp with time zone DEFAULT current_timestamp,
updated_at timestamp with time zone DEFAULT current_timestamp,
deleted boolean,
PRIMARY KEY (entity_id, patent_id)
);
不幸的是,表中有重复项,我之前无法添加pk约束。尝试这样做现在导致:
DETAIL: Key (entity_id, patent_id)=(123123, 811231333) is duplicated.
有没有办法删除这些副本?
答案 0 :(得分:1)
你可以使用ROW_NUMBER(),所以试试这个:(未经测试可能会有一些语法问题)
DELETE FROM entities_patents t
WHERE EXISTS(SELECT * FROM (
SELECT p.entity_id,p.patent_id,p.created_at,p.updated_at,
ROW_NUMBER() OVER(PARTITION BY p.entity_id,p.patent_id ORDER BY p.created_at DESC,p.updated_at DESC) as rnk
FROM entities_patents p) s
WHERE s.rnk > 1 and s.entity_id = t.entity_id and s.patent_id = t.patent_id
AND s.created_at = t.created_at and s.updated_at = t.updated_at)
答案 1 :(得分:1)
WITH CTE AS
(SELECT *,ROW_NUMBER() OVER(PARTITION by entity_id, patent_id
ORDER BY created_at) AS rn
FROM entities_patents)
DELETE
FROM entities_patents
WHERE (entity_id,patent_id) IN (SELECT entity_id,patent_id FROM CTE WHERE rn >1)
答案 2 :(得分:1)
使用group by
和count
DELETE
FROM entities_patents B
USING (
select entity_id, patent_id
from entities_patents
group by entity_id, patent_id
having count(*) > 1
) C
WHERE B.entity_id = C.entity_id AND
B.patent_id = C.patent_id