我正在使用PostgreSQL 10.4
源数据来源于下表
create table if not exists impressions
(
id_impressions bigserial not null,
date_visit timestamp not null,
address_id integer not null,
constraint impressions_pkey
primary key (id_impressions)
);
insert into impressions(date_visit, address_id)
values ('2018-06-01 15:36:47',1),
('2018-06-01 15:38:22',1),
('2018-06-01 15:38:22',1),
('2018-06-01 18:10:44',1),
('2018-06-01 00:05:21',17),
('2018-06-01 00:05:21',17),
('2018-06-01 00:10:57',17);
下一张表是结果的位置
create table if not exists sessions
(
address_id integer,
date_visit timestamp,
session_id integer,
impressions_id bigint,
what_i_want_session_id uuid
)
我有以下数据
address_id date_visit session_id what_i_want_session_id
1 2018-06-01 15:36:47 1 7c8815fd-04ec-4f16-a467-d28ac7c9c1fd
1 2018-06-01 15:38:22 1 <null>
1 2018-06-01 15:38:22 1 <null>
1 2018-06-01 18:10:44 2 e6e2c427-0354-4017-8019-319e4b56358b
17 2018-06-01 00:05:21 3 7d0c1fe9-e229-4c1c-977b-2caaaa62624c
17 2018-06-01 00:05:21 3 <null>
17 2018-06-01 00:10:57 3 <null>
它必须像这样:
address_id date_visit session_id what_i_want_session_id
1 2018-06-01 15:36:47 1 7c8815fd-04ec-4f16-a467-d28ac7c9c1fd
1 2018-06-01 15:38:22 1 7c8815fd-04ec-4f16-a467-d28ac7c9c1fd
1 2018-06-01 15:38:22 1 7c8815fd-04ec-4f16-a467-d28ac7c9c1fd
1 2018-06-01 18:10:44 2 e6e2c427-0354-4017-8019-319e4b56358b
17 2018-06-01 00:05:21 3 7d0c1fe9-e229-4c1c-977b-2caaaa62624c
17 2018-06-01 00:05:21 3 7d0c1fe9-e229-4c1c-977b-2caaaa62624c
17 2018-06-01 00:10:57 3 7d0c1fe9-e229-4c1c-977b-2caaaa62624c
我使用以下查询创建上述数据。我将第二个案例陈述作为我要在最终解决方案中获取的示例。第一个case语句确实导致了正确的解决方案,但我要求在以后的运行中永远不要再次使用会话ID。为值设置新的uuid很简单,但是我不确定如何用前面的uuid替换空值。
select address_id,
s1.date_visit,
sum(is_new_session) over (order by address_id, s1.date_visit) as session_id,
what_i_want_session_id
from (
select *,
case when extract('EPOCH' from s0.date_visit) -
extract('EPOCH' from last_event) >= 1800
or last_event is null then 1
else 0 end as is_new_session,
(case when extract('EPOCH' from s0.date_visit) -
extract('EPOCH' from last_event) >= 1800
or last_event is null then gen_random_uuid() end) as what_i_want_session_id
from (
select address_id,
i.date_visit,
lag(i.date_visit, 1) over (partition by address_id order by i.date_visit) AS last_event
from impressions i
where i.date_visit between '20180601' and '20180601'::date + interval '1 day'
) s0
) s1;
答案 0 :(得分:0)
一种可能的方法是使用CTE。然后,我们可以使用子查询以what_i_want_session_id
作为链接来获取非null session_id
。
WITH cte
AS
(
SELECT x.address_id,
x.date_visit,
sum(x.is_new_session) OVER (ORDER BY x.address_id,
x.date_visit) session_id,
CASE x.is_new_session
WHEN 1 THEN
gen_random_uuid()
END what_i_want_session_id
FROM (SELECT i.address_id,
i.date_visit,
CASE
WHEN coalesce(i.date_visit
- lag(i.date_visit,
1) OVER (PARTITION BY i.address_id
ORDER BY i.date_visit),
'00:30:00'::interval) >= '00:30:00'::interval THEN
1
ELSE
0
END is_new_session
FROM impressions i) x
)
SELECT c1.address_id,
c1.date_visit,
c1.session_id,
(SELECT c2.what_i_want_session_id
FROM cte c2
WHERE c2.session_id = c1.session_id
AND c2.what_i_want_session_id IS NOT NULL) what_i_want_session_id
FROM cte c1;
对于无CTE的解决方案,我们需要获取不同的session_id
并为每个获取随机值。然后,我们可以使用session_id
作为链接再次加入。
SELECT x5.address_id,
x5.date_visit,
x5.session_id,
x4.what_i_want_session_id
FROM (SELECT x3.session_id,
gen_random_uuid() what_i_want_session_id
FROM (SELECT DISTINCT
x2.session_id
FROM (SELECT sum(x1.is_new_session) OVER (ORDER BY x1.address_id,
x1.date_visit) session_id
FROM (SELECT i.address_id,
i.date_visit,
CASE
WHEN coalesce(i.date_visit
- lag(i.date_visit,
1) OVER (PARTITION BY i.address_id
ORDER BY i.date_visit),
'00:30:00'::interval) >= '00:30:00'::interval THEN
1
ELSE
0
END is_new_session
FROM impressions i) x1) x2) x3) x4
INNER JOIN (SELECT x1.address_id,
x1.date_visit,
sum(x1.is_new_session) OVER (ORDER BY x1.address_id,
x1.date_visit) session_id
FROM (SELECT i.address_id,
i.date_visit,
CASE
WHEN coalesce(i.date_visit
- lag(i.date_visit,
1) OVER (PARTITION BY i.address_id
ORDER BY i.date_visit),
'00:30:00'::interval) >= '00:30:00'::interval THEN
1
ELSE
0
END is_new_session
FROM impressions i) x1) x5
ON x5.session_id = x4.session_id;
注意:在小提琴中,缺少gen_random_uuid()
函数的扩展名,所以我在那儿使用了random()
。