拥有一个事件表,每个用户有多种类型的UUID,我们希望找到一种方法将所有这些UUID拼接在一起,以获得单个用户的最高可能定义。
例如:
UUID1 | UUID2
1 a
1 a
2 a
2 b
3 c
4 c
这里有2位用户,第一位是uuid1 = {1,2},uuid2 = {a,b},第二位是uuid1 = {3,4},uuid2 = {c}。这些链可能很长。没有交叉点(即1c不存在),所有行都按时间戳排序。
红移是否有办法产生这些独特的"客人"标识符没有创建具有多个连接的巨大查询?
提前致谢!
答案 0 :(得分:0)
创建测试数据表
-- DROP TABLE uuid_test;
CREATE TEMP TABLE uuid_test AS
SELECT 1 row_id, 1::int uuid1, 'a'::char(1) uuid2
UNION ALL SELECT 2 row_id, 1::int uuid1, 'a'::char(1) uuid2
UNION ALL SELECT 3 row_id, 2::int uuid1, 'a'::char(1) uuid2
UNION ALL SELECT 4 row_id, 2::int uuid1, 'b'::char(1) uuid2
UNION ALL SELECT 5 row_id, 3::int uuid1, 'c'::char(1) uuid2
UNION ALL SELECT 6 row_id, 4::int uuid1, 'c'::char(1) uuid2
UNION ALL SELECT 7 row_id, 4::int uuid1, 'd'::char(1) uuid2
UNION ALL SELECT 8 row_id, 5::int uuid1, 'e'::char(1) uuid2
UNION ALL SELECT 9 row_id, 6::int uuid1, 'e'::char(1) uuid2
UNION ALL SELECT 10 row_id, 6::int uuid1, 'f'::char(1) uuid2
UNION ALL SELECT 11 row_id, 7::int uuid1, 'f'::char(1) uuid2
UNION ALL SELECT 12 row_id, 8::int uuid1, 'g'::char(1) uuid2
UNION ALL SELECT 13 row_id, 8::int uuid1, 'h'::char(1) uuid2
;
通过使用严格排序来查找唯一用户更改的每个位置,将其捕获为查找表,然后将其应用于原始数据,解决了实际问题。
-- Create lookup table with a from-to range of IDs for each unique user
WITH unique_user AS (
-- Calculate the end of the id range using LEAD() to look ahead
-- Use an inline MAX() to find the ending ID for the last entry
SELECT row_id AS from_id
, NVL(LEAD(row_id,1) OVER (ORDER BY row_id)-1, (SELECT MAX(row_id) FROM uuid_test) ) AS to_id
, unique_uuid
-- Mark unique user change when there is discontinuity in either UUID
FROM (SELECT row_id
,CASE WHEN NVL(LAG(uuid1,1) OVER (ORDER BY row_id), 0) <> uuid1
AND NVL(LAG(uuid2,1) OVER (ORDER BY row_id), '') <> uuid2
THEN MD5(uuid1||uuid2)
ELSE NULL END unique_uuid
FROM uuid_test) t
WHERE unique_uuid IS NOT NULL
ORDER BY row_id
)
-- Apply the unique user value to each row using a range join to the lookup table
SELECT a.row_id, a.uuid1, a.uuid2, b.unique_uuid
FROM uuid_test AS a
JOIN unique_user AS b
ON a.row_id BETWEEN b.from_id AND b.to_id
ORDER BY a.row_id
;
这是输出
row_id | uuid1 | uuid2 | unique_uuid
--------+-------+-------+----------------------------------
1 | 1 | a | efaa153b0f682ae5170a3184fa0df28c
2 | 1 | a | efaa153b0f682ae5170a3184fa0df28c
3 | 2 | a | efaa153b0f682ae5170a3184fa0df28c
4 | 2 | b | efaa153b0f682ae5170a3184fa0df28c
5 | 3 | c | 5fcfcb7df376059d0075cb892b2cc37f
6 | 4 | c | 5fcfcb7df376059d0075cb892b2cc37f
7 | 4 | d | 5fcfcb7df376059d0075cb892b2cc37f
8 | 5 | e | 18a368e1052b5aa0388ef020dd9a1e20
9 | 6 | e | 18a368e1052b5aa0388ef020dd9a1e20
10 | 6 | f | 18a368e1052b5aa0388ef020dd9a1e20
11 | 7 | f | 18a368e1052b5aa0388ef020dd9a1e20
12 | 8 | g | 321fcc2447163a81d470b9353e394121
13 | 8 | h | 321fcc2447163a81d470b9353e394121