PersistentId UserId EnterDate
111 1 June 1, 2015 17:05
112 1 June 1, 2015 17:21
113 1 June 1, 2015 17:27
114 1 June 1, 2015 18:25
115 1 June 1, 2015 19:00
116 2 June 1, 2015 18:05
117 2 June 1, 2015 18:21
118 2 June 1, 2015 19:27
我想得到一个UserId列表和每个UserId的计数,这样只有EnterDates<之间存在差异的行。包括30分钟。
因此对于上述数据,输出将是
UserId Count
1 3
2 2
应为UserId 1提取的行包含persistentIds 111,114,115。
应为UserId 2提取的行包含persistentIds 116,118
关于如何编写此SQL查询的任何想法?
答案 0 :(得分:0)
您的问题措辞不明确,但根据您的预期结果,我认为您希望使用NOT EXISTS
过滤掉具有相同用户ID的另一条记录后不到30分钟的记录。像这样:
with d as (
SELECT 111 persistent_id, 1 user_id, to_date('June 1, 2015 17:05','Month DD, YYYY HH24:MI') enter_date from dual UNION ALL
SELECT 112 persistent_id, 1 user_id, to_date('June 1, 2015 17:21','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 113 persistent_id, 1 user_id, to_date('June 1, 2015 17:27','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 114 persistent_id, 1 user_id, to_date('June 1, 2015 18:25','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 115 persistent_id, 1 user_id, to_date('June 1, 2015 19:00','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 116 persistent_id, 2 user_id, to_date('June 1, 2015 18:05','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 117 persistent_id, 2 user_id, to_date('June 1, 2015 18:21','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 118 persistent_id, 2 user_id, to_date('June 1, 2015 19:27','Month DD, YYYY HH24:MI') from dual
)
select d.user_id, count(*)
from d
where not exists ( SELECT 'record for same userid but less than 30 minutes earlier'
FROM d d2
WHERE d2.user_id = d.user_id
AND d2.enter_date between d.enter_date - (0.5/24) and d.enter_date
and d2.persistent_id != d.persistent_id )
group by d.user_id
order by d.user_id
答案 1 :(得分:0)
您可以使用LAG功能获取上一个功能。为用户输入enterDate并计算事件之间的持续时间
select user_id, count(*)
from
(with d as (
SELECT 111 persistent_id, 1 user_id, to_date('June 1, 2015 17:05','Month DD, YYYY HH24:MI') enter_date from dual UNION ALL
SELECT 112 persistent_id, 1 user_id, to_date('June 1, 2015 17:21','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 113 persistent_id, 1 user_id, to_date('June 1, 2015 17:27','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 114 persistent_id, 1 user_id, to_date('June 1, 2015 18:25','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 115 persistent_id, 1 user_id, to_date('June 1, 2015 19:00','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 116 persistent_id, 2 user_id, to_date('June 1, 2015 18:05','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 117 persistent_id, 2 user_id, to_date('June 1, 2015 18:21','Month DD, YYYY HH24:MI') from dual UNION ALL
SELECT 118 persistent_id, 2 user_id, to_date('June 1, 2015 19:27','Month DD, YYYY HH24:MI') from dual
)
select d.user_id, persistent_id, enter_date
,lag(persistent_id) over (partition by user_id order by enter_date)
,lag(enter_date) over (partition by user_id order by enter_date)
,(enter_date - nvl (lag(enter_date) over (partition by user_id order by enter_date), enter_date))*24*60 duration
from d
) where duration < 30
group by user_id
--results
USER_ID COUNT(*)
1 1 3
2 2 2
答案 2 :(得分:0)
两个查询既提供了您的预期结果,又使用了30分钟的窗口但对您的要求有完全不同的解释......您可能想澄清这个问题。
Oracle 11g R2架构设置:
CREATE TABLE table_name (PersistentId, UserId, EnterDate ) AS
SELECT 111, 1, to_date('June 1, 2015 17:05','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 112, 1, to_date('June 1, 2015 17:21','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 113, 1, to_date('June 1, 2015 17:27','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 114, 1, to_date('June 1, 2015 18:25','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 115, 1, to_date('June 1, 2015 19:00','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 116, 2, to_date('June 1, 2015 18:05','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 117, 2, to_date('June 1, 2015 18:21','Month DD, YYYY HH24:MI') FROM DUAL
UNION ALL SELECT 118, 2, to_date('June 1, 2015 19:27','Month DD, YYYY HH24:MI') FROM DUAL
查询1 - 计算30分钟窗口的结果:
SELECT UserId,
"Count"
FROM (
SELECT UserID,
COUNT(*) OVER ( PARTITION BY UserId ORDER BY EnterDate RANGE BETWEEN INTERVAL '30' MINUTE PRECEDING AND CURRENT ROW ) AS "Count",
EnterDate,
LEAD(EnterDate) OVER ( PARTITION BY UserId ORDER BY EnterDate ) AS nextEnterDate
FROM Table_Name
)
WHERE "Count" > 1
AND EnterDate + INTERVAL '30' MINUTE < nextEnterDate
<强> Results 强>:
| USERID | Count |
|--------|-------|
| 1 | 3 |
| 2 | 2 |
查询2 - 计算另一行30分钟内的所有行:
SELECT UserID,
COUNT(1) AS "Count"
FROM (
SELECT UserID,
EnterDate,
LAG(EnterDate) OVER ( PARTITION BY UserId ORDER BY EnterDate ) AS prevDate,
LEAD(EnterDate) OVER ( PARTITION BY UserId ORDER BY EnterDate ) AS nextDate
FROM Table_Name
)
WHERE EnterDate - INTERVAL '30' MINUTE < prevDate
OR EnterDate + INTERVAL '30' MINUTE > nextDate
GROUP BY UserId
<强> Results 强>:
| USERID | Count |
|--------|-------|
| 1 | 3 |
| 2 | 2 |