我在click_log表下面记录了一些网址的点击次数
site ip ua direction hit_time
-----------------------------------------------------
1 127.0.0.1 1 20010/01/01 00:00:00
2 127.0.0.1 1 20010/01/01 00:01:00
3 127.0.0.1 0 20010/01/01 00:10:00
.... .........
我想选择传入的匹配(方向:1)并按以下网站分组:
我不确定上面是否足够明确。英语不是我的第一语言。让我试着用一个例子来解释。
如果站点1从同一个IP和浏览器获得5次点击,并且在从该IP和浏览器获得第一次独特命中后10分钟内我希望它包含在选择中。
基本上我想找到虐待者。
答案 0 :(得分:2)
我认为这可以满足您的需求。我也提供了一些样本数据。
Create Table #t
(
[Site] int,
IP varchar(20),
Direction int,
Hit_Time datetime
)
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:00:00')
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:01:00')
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:03:00')
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:04:00')
Insert Into #t
Values (2,'127.0.0.2',1,'2010-01-01 00:00:00')
Insert Into #t
Values (2,'127.0.0.2',1,'2010-01-01 00:01:00')
Insert Into #t
Values (2,'127.0.0.2',0,'2010-01-01 00:03:00')
Insert Into #t
Values (2,'127.0.0.2',1,'2010-01-01 00:04:00')
Select Distinct Site
From #t
Where Direction = 1
Group by Site, IP
Having (DateDiff(minute,Min(HIt_Time), max(hit_time)) <= 10) And Count(*) >= 4
Drop Table #t
答案 1 :(得分:0)
您可能正在寻找Between
运算符,如下所述:
答案 2 :(得分:0)
怎么样?
SELECT IP, (SELECT COUNT(*) FROM Click_Log WHERE Click_Log.IP = CL.IP
AND DIRECTION = 1 AND DATEDIFF(MINUTE, ClickLog.HIT_TIME, CL.HIT_TIME)
BETWEEN -10 AND 10) AS CLICK_COUNT
FROM Click_Log CL
WHERE DIRECTION = 1 AND CLICK_COUNT > 4
答案 3 :(得分:0)
;WITH rankings AS (
SELECT *, DENSE_RANK() OVER(ORDER BY [site], ip, ua) groupId,
ROW_NUMBER() OVER(PARTITION BY [site], ip, ua ORDER BY hit_time) sequence
FROM Hits
WHERE direction = 1),
periods AS (
SELECT r.groupId, r.sequence, count(*) hitCount
FROM rankings r
LEFT OUTER JOIN rankings r2
ON r2.groupId = r.groupId and r2.sequence < r.sequence
AND r2.hit_time >= DATEADD(second, -10*60, r.hit_time)
AND r2.hit_time < r.hit_time
GROUP BY r.groupId, r.sequence
),
groups AS (
SELECT p.groupId, MAX(p.hitCount) maxHitCount
FROM periods p
GROUP BY p.groupId
)
SELECT DISTINCT r.[site], r.ip, r.ua, g.maxHitCount
FROM rankings r
INNER JOIN groups g ON g.groupId = r.groupId
WHERE maxHitCount >= 5
ORDER BY maxHitCount DESC
答案 4 :(得分:0)
我已经在OP评论中添加了这个答案。
我使用了以下测试数据:
Create Table dbo.Temp
(
[Site] int,
IP varchar(20),
Direction int,
Hit_Time datetime
)
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:00:00')
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:01:00')
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:03:00')
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:04:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:00:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:31:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:32:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:33:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:34:00')
首先,你需要创建一个函数来完成工作:
Create Function dbo.fn_CheckSuspectActivity (@Site int, @IP varchar(20), @MinDate datetime,
@MaxDate datetime, @Direction int, @Interval int,
@MaxCount int)
returns int
as begin
Declare @OrigMaxDate datetime,
@IsSuspect int
Set @OrigMaxDate = @MaxDate
Set @IsSuspect = 0
if (DATEDIFF(minute, @MinDate, @MaxDate) > 10)
--Min and Max dates for site & Ip
-- are more than 10 minutes apart
begin
--Loop through the records
While (@MaxDate <= @OrigMaxDate And @IsSuspect = 0)
begin
-- Set The MaxDate to the MinDate plus 10 mins
Set @MaxDate = DATEADD(Minute, 10, @MinDate)
If (Select COUNT(*)
From dbo.Temp
Where Site = @Site
And IP = @IP
And Hit_Time >= @MinDate
And Hit_Time <= @MaxDate
And Direction = @Direction
) >= @MaxCount
Begin
-- Hit Count exceeded for the specified 10 min range
set @IsSuspect = 1
End
Else
Begin
-- Set the minDate to the maxDate
Set @MinDate = @MaxDate
--Add another 10 minutes on
Set @MaxDate = DATEADD(minute, 10,@MaxDate)
End
end
-- We've finished the loop but if @IsSuspect is still zero we need to do one final check
if (@IsSuspect = 0)
begin
-- Check the number of records based on the last MinDate used
-- and the original MaxDate
If (Select COUNT(*)
From dbo.Temp
Where Site = @Site
And IP = @IP
And Hit_Time >= @MinDate
And Hit_Time <= @OrigMaxDate
And Direction = @Direction
) >= @MaxCount
begin
-- Hit Count exceeded for the specified 10 min range
set @IsSuspect = 1
end
else
begin
set @IsSuspect = 0
end
end
end
else
-- Time difference isn't more than 10 minutes so do a "normal" check
begin
If (Select COUNT(*)
From dbo.Temp
Where Site = @Site
And IP = @IP
And Hit_Time >= @MinDate
And Hit_Time <= @MaxDate
And Direction = @Direction) >= @MaxCount
BEGIN -- Its a suspect IP
Set @IsSuspect = 1
END
ELSE
BEGIN
-- It's ok
Set @IsSuspect = 0
END
end
return @IsSuspect
End
Go
然后这个select语句应该给你正确答案:
With Qry as
(
Select Site,
IP,
MIN(Hit_Time) as'MinTime',
MAX(Hit_TIme) as 'MaxTime'
From dbo.Temp
Group By Site, IP
)
Select Site
From Qry
Where dbo.fn_CheckSuspectActivity(Site, IP, MinTime, MaxTime, 1, 10, 4) = 1
-- function params are as follows: Site Number, IP Address, FirstTimeLogged,
-- LastTimeLogged, Direction, IntervalToCheck, MaxOccurences
如果第一个和最后一个日期相隔不到10分钟,那么它会检查它们是否超过了命中数。如果第一个日期和最后一个日期相隔超过10分钟,它会按照10分钟的间隔递增第一个日期,并检查它们是否在10分钟内超过了hitcount。
我希望这是你需要的。
百里