我有一个包含事件的表,我需要找到重复的事件。问题是彼此相隔1秒发生的事件被认为是重复的。所以如果我的表有这些值
id | var1 | var2 | var3 | date
1 | 1 | 2 | 3 | 2001-01-01 01:01:01.456
2 | 1 | 2 | 3 | 2001-01-01 01:01:02.234
3 | 1 | 2 | 3 | 2001-01-01 01:01:04.789
记录1和2被认为是重复的,因为它们在1秒内,但3不是因为它在2之后超过1秒。
有没有办法编写只选择一系列重复项中第一条记录的查询?
编辑:可能还有一些不需要捕获的重复行。 Id是表的主键,不在匹配条件中使用;它只是为了澄清。
答案 0 :(得分:2)
Lag is one possible solution, something like this:
select * from (
select *, lag(date,1) over(order by date) previoustime from yourtable
) x
where datediff(second,previoustime,date)<1
答案 1 :(得分:2)
这是一种看起来应该适合你的方式。
一些假设:
row_number()
窗口函数删除分区,它将改变行为这是代码。取消注释表中的两行以查看更改
declare @table table(id int, var1 int, var2 int, var3 int, date datetime2)
insert into @table
values
--(0,1,2,3,'2001-01-01 00:01:01.456'),
(1,1,2,3,'2001-01-01 01:01:01.456'), --dupe of 1/2/3
(2,1,2,3,'2001-01-01 01:01:02.214'), --dupe of 1/2/3
(3,1,2,3,'2001-01-01 01:01:02.234'), --dupe of 1/2/3
(4,1,2,3,'2001-01-01 01:01:02.244'), --dupe of 1/2/3
(5,1,2,3,'2001-01-01 01:01:04.789'), --dupe of 4/5
(6,1,2,3,'2001-01-01 01:01:04.989'), --dupe of 4/5
--(7,1,2,3,'2001-01-01 01:01:06.789'), --dupe of 6/7
(8,1,2,3,'2001-01-01 01:01:06.799') --dupe of 6/7
--apply the sequence
;with cte as(
select
*,
ROW_NUMBER() over (partition by var1, var2, var3 order by date) as RN --just in case... change this to just order by id, date if need be and remove the partition
from
@table),
--get first / most of the batch to remove
cte2 as(
select
c1.*
,c2.RN as RowsToRemove
from cte c1
left join
cte c2 on c1.RN < c2.rn and
datediff(second,c1.date,c2.date) < 1),
--remove the rows identified in the above cte
cte3 as(
select distinct
ID,
var1,
var2,
var3,
date,
RN
from cte2
where
RN not in (select distinct isnull(RowsToRemove,0) from cte2)),
--add another sequence. This is necessary for first/last row check for duplicate
cte4 as(
select
f.*,
row_number() over (partition by var1, var2, var3 order by date) RN2
from
cte3 f)
--return the results
select
f.ID,
f.var1,
f.var2,
f.var3,
f.date
from
cte4 f
left join
cte4 d on d.RN = f.RN - 1
where isnull(datediff(second,d.date,f.date),500) > 1
<强>返回强>
+----+------+------+------+-----------------------------+
| ID | var1 | var2 | var3 | date |
+----+------+------+------+-----------------------------+
| 1 | 1 | 2 | 3 | 2001-01-01 01:01:01.4560000 |
| 5 | 1 | 2 | 3 | 2001-01-01 01:01:04.7890000 |
| 8 | 1 | 2 | 3 | 2001-01-01 01:01:06.7990000 |
+----+------+------+------+-----------------------------+
答案 2 :(得分:0)
select T1.date,... from MyTable T1
left outer join MyTable T2 on cast(T1.date as date) = cast(T2.date as date) and
datediff(second,T1.date,T2.date)<=1
group by cast(T1.date as date)