用于保留用户的HPE Vertica实时聚合投影示例

时间:2017-08-14 13:12:59

标签: sql vertica retention

create table events(
    id char(36) PRIMARY KEY,
    game_id varchar(24) not null, 
    user_device_id char(36) not null, 
    event_name varchar(100) not null, 
    generated_at timestamp with time zone not null
);

SELECT 
    events.generated_at::DATE AS time_stamp, 
    COUNT(DISTINCT ( 
        CASE WHEN 
            events.event_name = 'new_user' THEN events.user_device_id 
        END
        )
    ) as new_users, 
    COUNT(DISTINCT (
        CASE WHEN 
            future_events.event_name <> 'new_user' THEN future_events.user_device_id 
        END
        )
    ) as returned_users, 
    COUNT(DISTINCT (
        CASE WHEN 
            future_events.event_name <> 'new_user' THEN future_events.user_device_id 
        END
    )) / COUNT(DISTINCT (
        CASE WHEN 
            events.event_name = 'new_user' THEN events.user_device_id 
        END
    ))::float as retention 
FROM events 
    LEFT JOIN events AS future_events ON 
        events.user_device_id = future_events.user_device_id AND 
        events.generated_at = future_events.generated_at - interval '1 day' AND 
        events.game_id = future_events.game_id
GROUP BY 
    time_stamp 
ORDER BY 
    time_stamp;

我试图通过上面的sql查询获得第N天('N' - &gt; 1到7之间的任何数字)用户保留。由于我是HPE vertica中的菜鸟,我无法提出最佳聚合投影创建语句,因为投影显着提高了查询的性能。

1 个答案:

答案 0 :(得分:1)

聚合投影无助于连接查询。

您可以创建常规投影,按连接列进行分段和排序,以实现性能提升:

CREATE PROJECTION events_p1 (
id,
game_id ENCODING RLE,
user_device_id ENCODING RLE,
event_name,
generated_at ENCODING RLE
) AS
SELECT id,
       game_id,
       user_device_id,
       event_name,
       generated_at
FROM events
ORDER BY generated_at,
         game_id, 
         user_device_id 
SEGMENTED BY hash(generated_at,game_id,user_device_id) ALL NODES KSAFE 1;