我的数据库中有一个表。该表有一个字段VehId(Int)和trackdt(Datetime)
我的表有3000亿行(是的,这是3000亿行)。所以我想删除旧数据。但我想根据时间间隔删除数据。
我想删除每辆车每20秒的记录。
以下是我的表
VehId Trackdt
1 2017-05-20 00:00:30.000
2 2017-05-20 00:00:32.000
2 2017-05-20 00:00:42.000
1 2017-05-20 00:00:40.000
2 2017-05-20 00:00:52.000
1 2017-05-20 00:00:50.000
1 2017-05-20 00:01:00.000
2 2017-05-20 00:01:02.000
1 2017-05-20 00:01:10.000
1 2017-05-20 00:01:20.000
2 2017-05-20 00:01:12.000
1 2017-05-20 00:01:30.000
2 2017-05-20 00:01:22.000
2 2017-05-20 00:01:32.000
删除数据后应如下所示
VehId TRackdt
1 2017-05-20 00:00:30.000
2 2017-05-20 00:00:32.000
1 2017-05-20 00:01:00.000
2 2017-05-20 00:01:02.000
1 2017-05-20 00:01:30.000
2 2017-05-20 00:01:32.000
我尝试了下面的查询但是花了太多时间
ALTER PROCEDURE [dbo].[DELETEINTERVALDATA]
@FROMDATE DATETIME,
@TODATE DATETIME,
@INTERVAL INT,
@FLAG INT
AS
BEGIN
SET NOCOUNT ON;
DECLARE @TRACKDT DATETIME
DECLARE @I INT =1
DECLARE @V INT =1
DECLARE @COUNT INT
DECLARE @VCOUNT INT
DECLARE @STARTDATE DATETIME = ''
DECLARE @VEHID INT
DECLARE @TIMEDIFF INT
CREATE TABLE #TEMPVEHICLE
(
SNO INT IDENTITY(1,1),
VEHID INT
)
CREATE TABLE #TEMPLOG
(
SNO INT IDENTITY(1,1),
TRACKDT DATETIME
)
IF (@FLAG = 1 )
BEGIN
INSERT INTO #TEMPVEHICLE (VEHID) SELECT VEHID FROM VEHICLEMASTER ORDER BY VEHID
SELECT @VCOUNT = COUNT(SNO) FROM #TEMPVEHICLE
WHILE (@V <= @VCOUNT)
BEGIN
SELECT @VEHID = VEHID FROM #TEMPVEHICLE WHERE SNO = @V
INSERT INTO #TEMPLOG(TRACKDT) SELECT TRACKDT
FROM TRACKINGLOG WITH(NOLOCK)
WHERE TRACKDT BETWEEN @FROMDATE AND @TODATE AND VEHID = @VEHID
ORDER BY TRACKDT ASC
SELECT @COUNT = COUNT(SNO) FROM #TEMPLOG
WHILE (@I <= @COUNT)
BEGIN
SELECT @TRACKDT=TRACKDT FROM #TEMPLOG WHERE SNO = @I
IF (@I = 1)
BEGIN
SELECT @STARTDATE = @TRACKDT
END
ELSE
BEGIN
SELECT @TIMEDIFF = DATEDIFF(SECOND,@STARTDATE,@TRACKDT)
IF @TIMEDIFF <= 20
BEGIN
DELETE FROM TRACKINGLOG WHERE TRACKDT = @TRACKDT AND VEHID = @VEHID
END
ELSE
BEGIN
SELECT @STARTDATE = @TRACKDT
END
END
SELECT @I = @I + 1
END
TRUNCATE TABLE #TEMPLOG
SELECT @V = @V + 1,@STARTDATE= '',@I=1
END
DROP TABLE #TEMPLOG
DROP TABLE #TEMPVEHICLE
END
END
如何编写基于时间间隔删除数据的查询,应该快速?
提前致谢
答案 0 :(得分:1)
您想要的输出对应 30 秒间隔。
您可以使用窗口函数检索结果集中的先前值。例如,LAG(trackdt,1)将返回先前的值。 LEAD
下一个。 FIRST_VALUE
将返回集合中的第一个值。
查询:
select * ,
FIRST_VALUE(trackdt) over (partition by vehid order by trackdt) as t0
from @mytable
当行按trackdt
排序时,将返回每辆车的第一个trackdt
值。
1 2017-05-20 00:00:30.000 2017-05-20 00:00:30.000
1 2017-05-20 00:00:40.000 2017-05-20 00:00:30.000
1 2017-05-20 00:00:50.000 2017-05-20 00:00:30.000
这样我们可以用datediff
计算分区中当前值和第一个值之间的间隔。结果查询有点难看:
select * ,
FIRST_VALUE(trackdt) over (partition by vehid order by trackdt) as t0,
datediff(s,FIRST_VALUE(trackdt) over (partition by vehid order by trackdt),
trackdt) as interval
from @mytable
除以30将给出每行所属的间隔桶。
1 2017-05-20 00:01:10.000 2017-05-20 00:00:30.000 1
1 2017-05-20 00:01:20.000 2017-05-20 00:00:30.000 1
1 2017-05-20 00:01:30.000 2017-05-20 00:00:30.000 2
2 2017-05-20 00:00:32.000 2017-05-20 00:00:32.000 0
2 2017-05-20 00:00:42.000 2017-05-20 00:00:32.000 0
计算余数虽然每30秒桶中的第一行会返回0:
select * ,
FIRST_VALUE(trackdt) over (partition by vehid order by trackdt) as t0,
datediff(s,FIRST_VALUE(trackdt) over (partition by vehid order by trackdt),
trackdt) %30 as remainder
from @mytable
1 2017-05-20 00:01:00.000 2017-05-20 00:00:30.000 0
1 2017-05-20 00:01:10.000 2017-05-20 00:00:30.000 10
1 2017-05-20 00:01:20.000 2017-05-20 00:00:30.000 20
1 2017-05-20 00:01:30.000 2017-05-20 00:00:30.000 0
2 2017-05-20 00:00:32.000 2017-05-20 00:00:32.000 0
我们可以使用一个或多个CTE来整理此查询并执行删除:
with start_times as
(
select * ,
FIRST_VALUE(trackdt) over (partition by vehid order by trackdt) as t0
from @mytable
),
intervals as
(
select * ,
datediff(s,t0,trackdt) %30 as rem
from start_times
)
delete
from intervals
where rem<>0
此查询取决于每30秒获取一次值。
通常,您可以使用ROW_NUMBER()
功能识别间隔桶中的记录,只需选择存储桶中的第一行即可。由于我们要保留第一行,我们选择任何有ROW_NUMBER&gt; 1的内容:
with start_times as
(
select * ,
FIRST_VALUE(trackdt) over (partition by vehid order by trackdt) as t0
from #mytable
),
intervals as
(
select * ,
datediff(s,t0,trackdt) /30 as interval
from start_times
),
ordered as
(
select *,
ROW_NUMBER() over(partition by vehid,interval order by trackdt) row_num
from intervals
)
select vehid,trackdt
from ordered
where row_num>1
即使这个查询很快,我也不会在3万亿行表上使用它。窗口化通常会导致假脱机 - 临时结果存储在tempdb中以允许窗口计算。最好选择必须删除的行的ID,将它们插入临时表,然后使用带有join子句的DELETE:
DELETE HugeTable
From HugeTable
INNER JOIN TempTable on TempTable.ID=HugeTable.ID
即便如此,您可能还想批量删除。在这种情况下,您可以使用NTILE函数计算1到N之间每一行的批号:
select ID,NTILE(100) over(order by vehid,trackdt) as batch_number
from ordered
where row_num=1
这将计算1到100之间的批号。您可以将其存储在临时表中,并一次删除一批行。
答案 1 :(得分:0)
如果时间是上一次之后20秒或更短时间,则要删除记录。但是,如果之前的时间被删除了,那么你想要与之前的时间进行比较等。 你需要某种兴奋剂。递归表表达式或常规循环。
Declare @myTable table (vehid int, trackdt datetime)
insert into @mytable values
(1, '2017-05-20 00:00:30.000'),
(2, '2017-05-20 00:00:32.000'),
(2, '2017-05-20 00:00:42.000'),
(1, '2017-05-20 00:00:40.000'),
(2, '2017-05-20 00:00:52.000'),
(1, '2017-05-20 00:00:50.000'),
(1, '2017-05-20 00:01:00.000'),
(2, '2017-05-20 00:01:02.000'),
(1, '2017-05-20 00:01:10.000'),
(1, '2017-05-20 00:01:20.000'),
(2, '2017-05-20 00:01:12.000'),
(1, '2017-05-20 00:01:30.000'),
(2, '2017-05-20 00:01:22.000'),
(2, '2017-05-20 00:01:32.000')
--set @@rowcount to 1
select 1
while @@ROWCOUNT > 0
begin
DELETE T1
from @mytable t1
--previous time
outer apply (select top 1 trackdt from @mytable where vehid = t1.vehid and trackdt < t1.trackdt order by 1 desc)t2
--previous time before that
outer apply (select top 1 trackdt from @mytable where vehid = t1.vehid and trackdt < t2.trackdt order by 1 desc)t3
--previous time was less or equal to 20 seconds
where DATEDIFF(second,t2.trackdt,t1.trackdt)<=20
--previous time before that was more than 20 seconds or there is no time before
and (DATEDIFF(second,t3.trackdt,t2.trackdt)>20 or t3.trackdt is null)
end
select * from @mytable
答案 2 :(得分:-1)
请阅读Parameter sniffing。假设您的存储过程很慢但是正确,我已经更新了您的SP。请试试:
ALTER PROCEDURE [dbo].[DELETEINTERVALDATA]
@FROMDATE1 DATETIME,
@TODATE1 DATETIME,
@INTERVAL1 INT,
@FLAG1 INT
AS
BEGIN
SET NOCOUNT ON;
DECLARE @TRACKDT DATETIME
DECLARE @I INT =1
DECLARE @V INT =1
DECLARE @COUNT INT
DECLARE @VCOUNT INT
DECLARE @STARTDATE DATETIME = ''
DECLARE @VEHID INT
DECLARE @TIMEDIFF INT
DECLARE @FROMDATE DATETIME = @FROMDATE1
DECLARE @TODATE DATETIME = @TODATE1
DECLARE @INTERVAL INT = @INTERVAL1
DECLARE @FLAG INT = @FLAG1
CREATE TABLE #TEMPVEHICLE
(
SNO INT IDENTITY(1,1),
VEHID INT
)
CREATE TABLE #TEMPLOG
(
SNO INT IDENTITY(1,1),
TRACKDT DATETIME
)
IF (@FLAG = 1 )
BEGIN
INSERT INTO #TEMPVEHICLE (VEHID) SELECT VEHID FROM VEHICLEMASTER ORDER BY VEHID
SELECT @VCOUNT = COUNT(SNO) FROM #TEMPVEHICLE
WHILE (@V <= @VCOUNT)
BEGIN
SELECT @VEHID = VEHID FROM #TEMPVEHICLE WHERE SNO = @V
INSERT INTO #TEMPLOG(TRACKDT) SELECT TRACKDT
FROM TRACKINGLOG WITH(NOLOCK)
WHERE TRACKDT BETWEEN @FROMDATE AND @TODATE AND VEHID = @VEHID
ORDER BY TRACKDT ASC
SELECT @COUNT = COUNT(SNO) FROM #TEMPLOG
WHILE (@I <= @COUNT)
BEGIN
SELECT @TRACKDT=TRACKDT FROM #TEMPLOG WHERE SNO = @I
IF (@I = 1)
BEGIN
SELECT @STARTDATE = @TRACKDT
END
ELSE
BEGIN
SELECT @TIMEDIFF = DATEDIFF(SECOND,@STARTDATE,@TRACKDT)
IF @TIMEDIFF <= 20
BEGIN
DELETE FROM TRACKINGLOG WHERE TRACKDT = @TRACKDT AND VEHID = @VEHID
END
ELSE
BEGIN
SELECT @STARTDATE = @TRACKDT
END
END
SELECT @I = @I + 1
END
TRUNCATE TABLE #TEMPLOG
SELECT @V = @V + 1,@STARTDATE= '',@I=1
END
DROP TABLE #TEMPLOG
DROP TABLE #TEMPVEHICLE
END
END