我有这样的数据:
+--------------+------------+----------+
| name | id | data | date |
+-------+------+-----------------------+
| host1 | 1 | data1 |07/20/2017|
| host1 | 1 | data2 |07/20/2017|
| host2 | 2 | data1 |07/20/2017|
| host2 | 3 | data1 |07/19/2017|
| host3 | 4 | data1 |07/20/2017|
| host3 | 4 | data2 |07/20/2017|
| host3 | 4 | data3 |07/20/2017|
| host3 | 4 | data4 |07/20/2017|
| host4 | 5 | data3 |07/20/2017|
| host4 | 6 | data4 |07/17/2017|
+-------+------+------------+----------+
我需要能够从表中删除一些陈旧的数据。每个对象的数据都有多行。但是我需要能够搜索表并查找“名称”匹配但“ID”不匹配的实例。然后删除旧记录。在上面的数据示例中,我想delete from tableName where ID = '3' or '6'
我已经能够使用以下查询来隔离哪些不匹配,但我无法弄清楚如何逐步执行(特别是如果有多个'名称'需要删除)
SELECT *
FROM tableName
WHERE NAME IN (SELECT T1.NAME
FROM aid2245.tableName T1
INNER JOIN aid2245.tableName T2
ON T1.NAME = T2.NAME
WHERE T1.ID <> T2.ID)
任何帮助将不胜感激。
谢谢!
答案 0 :(得分:1)
删除3和6。
create table tableName
(id int,
name varchar(50),
[date] date)
GO
-- Sample data
insert into tableName
(name, id, [date])
values('host1' , 1 , '07/20/2017'),
( 'host1' , 1 , '07/20/2017'),
( 'host2' , 2 , '07/20/2017'),
( 'host2' , 3 , '07/19/2017'),
( 'host3' , 4 , '07/20/2017'),
( 'host3' , 4 , '07/20/2017'),
( 'host3' , 4 , '07/20/2017'),
( 'host3' , 4 , '07/20/2017'),
( 'host4' , 5 , '07/20/2017'),
( 'host4' , 6 , '07/17/2017')
GO
-- Look at what we are deleting first.
SELECT b.*
FROM (SELECT name, max(date) maxDate
FROM tableName
GROUP BY name) AS a
JOIN tableName AS b
ON a.name = b.name
AND b.date < a.maxDate
AND b.Id not in
(SELECT max(id)
FROM tableName
GROUP BY [date]
HAVING date = maxDate)
GO
-- delete the older rows
DELETE b
FROM (SELECT name, max(date) maxDate
FROM tableName
GROUP BY name) AS a
JOIN tableName AS b
ON a.name = b.name
AND b.date < a.maxDate
AND b.Id not in
(SELECT max(id)
FROM tableName
GROUP BY [date]
HAVING date = maxDate)
GO
答案 1 :(得分:0)
以下应该可以解决问题。
IF OBJECT_ID('tempdb..#TestData', 'U') IS NULL
BEGIN -- DROP TABLE #TestData
CREATE TABLE #TestData (
[name] CHAR(5) NOT NULL,
id INT NOT NULL,
[data] CHAR(5) NOT NULL,
[date] DATE NOT NULL
);
INSERT #TestData (name, id, data, date) VALUES
('host1', 1, 'data1', '07/20/2017'),
('host1', 1, 'data2', '07/20/2017'),
('host2', 2, 'data1', '07/20/2017'),
('host2', 3, 'data1', '07/19/2017'),
('host3', 4, 'data1', '07/20/2017'),
('host3', 4, 'data2', '07/20/2017'),
('host3', 4, 'data3', '07/20/2017'),
('host3', 4, 'data4', '07/20/2017'),
('host4', 5, 'data3', '07/20/2017'),
('host4', 6, 'data4', '07/17/2017');
END;
--==========================================================
-- 1) View before DELETE...
SELECT
*,
NeedsDelete = CASE WHEN td.id = LAG(td.id, 1, td.id) OVER (PARTITION BY td.name ORDER BY td.date) THEN '' ELSE 'Delete' END
FROM
#TestData td
ORDER BY
td.name,
td.date DESC;
-- 2) Do the actual DELETE...
WITH
cte_FindDelete AS (
SELECT
*,
NeedsDelete = CASE WHEN td.id = LAG(td.id, 1, td.id) OVER (PARTITION BY td.name ORDER BY td.date) THEN '' ELSE 'Delete' END
FROM
#TestData td
)
DELETE fd
FROM
cte_FindDelete fd
WHERE
fd.NeedsDelete = 'Delete';
HTH, 杰森
答案 2 :(得分:0)
我会使用窗口函数执行此操作:
with todelete as (
select t.*,
min(id) over (partition by name) as minid,
max(id) over (partition by name) as maxid,
row_number() over (partition by name order by date desc) as seqnum
from tablename t
)
delete from todelete
where minid <> maxid and seqnum > 1;