我有一张记录事件的表:
create table #events
( intRowId int identity(1,1),
intItemId int,
intUserId int,
datEvent datetime)
这是一张拥有数百万行的大桌子,可记录数千件物品和成千上万用户的活动。
我想要查找一个包含10个itemID的选择组,但只有当它们以某种模式出现时:我正在尝试查找所有这些项目中的所有项目都针对相同的userID注册的事件并关闭和时在一起,说5分钟。
我绝对没有想法如何解决这个问题。人们会认为分区涉及到某个地方,但是,即使只是某个地方的帮助,也会非常感激。
干杯, 马特
答案 0 :(得分:1)
让我们说你想要关于物品ID的统计数据:1,2,...,10。 首先创建一个表EventByItems:
CREATE TABLE EventByItems
(
intRowId int identity(1,1),
intUserId int,
datEvent datetime,
intItem1 int,
intItem2 int,
intItem3 int,
...
intItem10 int
)
然后使用query填充此表:
SELECT intUserId, datEvent,
SUM(pvt.[1]), SUM(pvt.[2]), SUM(pvt.[3]), ... , SUM(pvt.[10])
FROM #events
PIVOT
(
COUNT(intItemId)
FOR intItemId IN ([1], [2], [3], ... , [10])
) AS pvt
GROUP BY intUserId, datEvent
现在我们可以使用该表做一些工作。例如,我们可以根据您的逻辑更新它以填补空白。或者我们可以这样做:
SELECT
intRowId,
intUserId,
datEvent
FROM
EventByItems AS E
WHERE
((intItem1 > 0) OR EXISTS(SELECT *
FROM EventByItems
WHERE intUserId = E.intUserId
AND intItem1 > 0
AND DATEDIFF(MINUTE, datEvent, E.datEvent) <= 5
AND intRowId != E.intRowId ))
AND
...
AND
((intItem10 > 0) OR EXISTS(SELECT *
FROM EventByItems
WHERE intUserId = E.intUserId
AND intItem10 > 0
AND DATEDIFF(MINUTE, datEvent, E.datEvent) <= 5
AND intRowId != E.intRowId ))
答案 1 :(得分:0)
好的,下面你会找到一个可以满足你想要的工作示例。我假设事件不必出现在十位。
但解决方案非常粗糙,并且运行缓慢,特别是如果您增加了项目数/用户数。无论如何,请不要为此向我投降太多,我希望你能找到一个合适的解决方案:)。
具有预先选择的事件的临时表将有助于我的解决方案中的性能,但您真正需要的是窗口函数,例如在Oracle中。
DROP TABLE #events
GO
create table #events
( intRowId int identity(1,1),
intItemId int,
intUserId int,
datEvent datetime)
GO
insert into #events (intUserId,intItemId, datEvent)
select '1','1','2013-05-01 10:25' union all --group1
select '1','2','2013-05-01 10:25' union all --group1
select '1','3','2013-05-01 10:26' union all --group1
select '1','7','2013-05-01 10:25' union all
select '1','8','2013-05-01 10:25' union all
select '1','9','2013-05-01 10:26' union all
select '1','1','2013-05-01 10:50' union all --group2
select '1','2','2013-05-01 10:52' union all --group2
select '1','3','2013-05-01 10:59' union all
select '1','1','2013-05-01 11:10' union all --group3
select '1','1','2013-05-01 11:12' union all --group3
select '1','3','2013-05-01 11:17' union all --group3
select '1','2','2013-05-01 11:25' union all
select '1','1','2013-05-01 11:31' union all
select '1','7','2013-05-01 11:32' union all
select '1','2','2013-05-01 11:50' union all --group4
select '1','2','2013-05-01 11:50' union all --group4
select '1','3','2013-05-01 11:50' union all --group4
select '1','1','2013-05-01 11:56'
GO
DROP TABLE #temp
GO
select
e1.intRowId as intRowId_1, e1.intItemId as intItemId_1, e1.intUserId as intUserId_1, e1.datEvent as datEvent_1
,e2.intRowId as intRowId_2, e2.intItemId as intItemId_2, e2.intUserId as intUserId_2, e2.datEvent as datEvent_2
into #temp
from #events e1
join #events e2
on e1.intUserId=e2.intUserId
and e1.datEvent<=e2.datEvent
and e1.intRowId<>e2.intRowId
where 1=1
and e1.intUserId=1
and e2.intUserId=1
and e1.intItemId in (1,2,3)
and e2.intItemId in (1,2,3)
and datediff(minute,e1.datevent,e2.datevent)<6
order by
e1.intRowId, e2.intRowId
GO
select distinct
*
from (
select
intRowId_1 as intRowId, intItemId_1 as intItemId, intUserId_1 as intUserId, datEvent_1 as datEvent
from #temp
UNION ALL
select
intRowId_2 as intRowId, intItemId_2 as intItemId, intUserId_2 as intUserId, datEvent_2 as datEvent
from #temp
) x
order by
datEvent, intRowId